This commit is contained in:
attilaz 2018-12-14 22:09:37 +01:00
parent d2812b9318
commit ead0e5b0fb
29 changed files with 2995 additions and 0 deletions

1147
examples/39-assao/assao.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2DARRAY(s_finalSSAO, 1);
// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
vec4 UnpackEdges( float _packedVal )
{
uint packedVal = uint(_packedVal * 255.5);
vec4 edgesLRTB;
edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0; // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
return saturate( edgesLRTB + u_invSharpness );
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
float ao;
uvec2 pixPos = uvec2(dtID.xy);
uvec2 pixPosHalf = pixPos / uvec2(2, 2);
// calculate index in the four deinterleaved source array texture
int mx = (int(pixPos.x) % 2);
#if BGFX_SHADER_LANGUAGE_GLSL
int dimy = imageSize(s_target).y;
int my = (int(dimy-1-pixPos.y) % 2);
#else
int my = (int(pixPos.y) % 2);
#endif
int ic = mx + my * 2; // center index
int ih = (1-mx) + my * 2; // neighbouring, horizontal
int iv = mx + (1-my) * 2; // neighbouring, vertical
int id = (1-mx) + (1-my)*2; // diagonal
vec2 centerVal = texelFetch(s_finalSSAO, ivec3(pixPosHalf, ic), 0 ).xy;
ao = centerVal.x;
#if 1 // change to 0 if you want to disable last pass high-res blur (for debugging purposes, etc.)
vec4 edgesLRTB = UnpackEdges( centerVal.y );
// return 1.0 - vec4( edgesLRTB.x, edgesLRTB.y * 0.5 + edgesLRTB.w * 0.5, edgesLRTB.z, 0.0 ); // debug show edges
// convert index shifts to sampling offsets
float fmx = float(mx);
float fmy = float(my);
// in case of an edge, push sampling offsets away from the edge (towards pixel center)
float fmxe = (edgesLRTB.y - edgesLRTB.x);
float fmye = (edgesLRTB.w - edgesLRTB.z);
// calculate final sampling offsets and sample using bilinear filter
#if BGFX_SHADER_LANGUAGE_GLSL
vec2 uvH = (dtID.xy + vec2( fmx + fmxe - 0.5, 1.0 - (0.5 - fmy) ) ) * 0.5 * u_halfViewportPixelSize;
#else
vec2 uvH = (dtID.xy + vec2( fmx + fmxe - 0.5, 0.5 - fmy ) ) * 0.5 * u_halfViewportPixelSize;
#endif
float aoH = texture2DArrayLod(s_finalSSAO, vec3( uvH, ih ), 0 ).x;
#if BGFX_SHADER_LANGUAGE_GLSL
vec2 uvV = (dtID.xy + vec2( 0.5 - fmx, 1.0 - (fmy - 0.5 + fmye) ) ) * 0.5 * u_halfViewportPixelSize;
#else
vec2 uvV = (dtID.xy + vec2( 0.5 - fmx, fmy - 0.5 + fmye ) ) * 0.5 * u_halfViewportPixelSize;
#endif
float aoV = texture2DArrayLod(s_finalSSAO, vec3( uvV, iv ), 0 ).x;
#if BGFX_SHADER_LANGUAGE_GLSL
vec2 uvD = (dtID.xy + vec2( fmx - 0.5 + fmxe, 1.0 - (fmy - 0.5 + fmye) ) ) * 0.5 * u_halfViewportPixelSize;
#else
vec2 uvD = (dtID.xy + vec2( fmx - 0.5 + fmxe, fmy - 0.5 + fmye ) ) * 0.5 * u_halfViewportPixelSize;
#endif
float aoD = texture2DArrayLod(s_finalSSAO, vec3( uvD, id ), 0 ).x;
// reduce weight for samples near edge - if the edge is on both sides, weight goes to 0
vec4 blendWeights;
blendWeights.x = 1.0;
blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5;
blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5;
blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5;
// calculate weighted average
float blendWeightsSum = dot( blendWeights, vec4( 1.0, 1.0, 1.0, 1.0 ) );
ao = dot( vec4( ao, aoH, aoV, aoD ), blendWeights ) / blendWeightsSum;
#endif
ao = pow(ao,1.0/2.2);
imageStore(s_target, ivec2(dtID.xy), ao.xxxx);
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2DARRAY(s_finalSSAO, 1);
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
uvec2 basePos = uvec2(dtID.xy) * 2;
vec2 baseUV = (vec2(basePos) + vec2( 0.5, 0.5 ) ) * u_halfViewportPixelSize;
vec2 gatherUV = (vec2(basePos) + vec2( 1.0, 1.0 ) ) * u_halfViewportPixelSize;
float avg = 0.0;
float minV = 1.0;
float maxV = 0.0;
UNROLL
for( int i = 0; i < 4; i++ )
{
vec4 vals = textureGather(s_finalSSAO, vec3( gatherUV, i ) );
// apply the same modifications that would have been applied in the main shader
vals = u_effectShadowStrength * vals;
vals = 1-vals;
vals = pow( saturate( vals ), u_effectShadowPow.xxxx );
avg += dot( vec4( vals.x, vals.y, vals.z, vals.w ), vec4( 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0 ) );
maxV = max( maxV, max( max( vals.x, vals.y ), max( vals.z, vals.w ) ) );
minV = min( minV, min( min( vals.x, vals.y ), min( vals.z, vals.w ) ) );
}
float minMaxDiff = maxV - minV;
imageStore(s_target, ivec2(dtID.xy), pow( saturate( minMaxDiff * 2.0 ), 0.8 ).xxxx);
}
}

View File

@ -0,0 +1,520 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
// progressive poisson-like pattern; x, y are in [-1, 1] range, .z is length( vec2(x,y) ), .w is log2( z )
#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32)
CONST(vec4 g_samplePatternMain[INTELSSAO_MAIN_DISK_SAMPLE_COUNT]) =
{
{ 0.78488064, 0.56661671, 1.500000, -0.126083}, { 0.26022232, -0.29575172, 1.500000, -1.064030}, { 0.10459357, 0.08372527, 1.110000, -2.730563}, {-0.68286800, 0.04963045, 1.090000, -0.498827},
{-0.13570161, -0.64190155, 1.250000, -0.532765}, {-0.26193795, -0.08205118, 0.670000, -1.783245}, {-0.61177456, 0.66664219, 0.710000, -0.044234}, { 0.43675563, 0.25119025, 0.610000, -1.167283},
{ 0.07884444, 0.86618668, 0.640000, -0.459002}, {-0.12790935, -0.29869005, 0.600000, -1.729424}, {-0.04031125, 0.02413622, 0.600000, -4.792042}, { 0.16201244, -0.52851415, 0.790000, -1.067055},
{-0.70991218, 0.47301072, 0.640000, -0.335236}, { 0.03277707, -0.22349690, 0.600000, -1.982384}, { 0.68921727, 0.36800742, 0.630000, -0.266718}, { 0.29251814, 0.37775412, 0.610000, -1.422520},
{-0.12224089, 0.96582592, 0.600000, -0.426142}, { 0.11071457, -0.16131058, 0.600000, -2.165947}, { 0.46562141, -0.59747696, 0.600000, -0.189760}, {-0.51548797, 0.11804193, 0.600000, -1.246800},
{ 0.89141309, -0.42090443, 0.600000, 0.028192}, {-0.32402530, -0.01591529, 0.600000, -1.543018}, { 0.60771245, 0.41635221, 0.600000, -0.605411}, { 0.02379565, -0.08239821, 0.600000, -3.809046},
{ 0.48951152, -0.23657045, 0.600000, -1.189011}, {-0.17611565, -0.81696892, 0.600000, -0.513724}, {-0.33930185, -0.20732205, 0.600000, -1.698047}, {-0.91974425, 0.05403209, 0.600000, 0.062246},
{-0.15064627, -0.14949332, 0.600000, -1.896062}, { 0.53180975, -0.35210401, 0.600000, -0.758838}, { 0.41487166, 0.81442589, 0.600000, -0.505648}, {-0.24106961, -0.32721516, 0.600000, -1.665244}
};
// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors
// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples)
CONST(uint g_numTaps[5]) = { 3, 5, 12, 0, 0 };
// an example of higher quality low/medium/high settings
// CONST(uint g_numTaps[5]) = { 4, 9, 16, 0, 0 };
// ** WARNING ** if changing anything here, please remember to update the corresponding C++ code!
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Optional parts that can be enabled for a required quality preset level and above (0 == Low, 1 == Medium, 2 == High, 3 == Highest/Adaptive, 4 == reference/unused )
// Each has its own cost. To disable just set to 5 or above.
//
// (experimental) tilts the disk (although only half of the samples!) towards surface normal; this helps with effect uniformity between objects but reduces effect distance and has other side-effects
#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar
#define SSAO_TILT_SAMPLES_AMOUNT (0.4)
//
#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar
#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable)
//
#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar
#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges
//
#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use DetailAOStrength; to disable simply set to 99 or similar
//
#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too
#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically
//
// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for
// testing purposes, it will not yield performance gains (or correct results)
#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1)
//
#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (99) // 99 means disabled; only helpful if artifacts at the edges caused by lack of out of screen depth data are not acceptable with the depth sampler in either clamp or mirror modes
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SAMPLER2D(s_viewspaceDepthSource, 0);
SAMPLER2D(s_viewspaceDepthSourceMirror, 1);
IMAGE2D_RO(s_normalmapSource, rgba8, 2);
UIMAGE2D_RO(s_loadCounter, r32ui, 3);
SAMPLER2D(s_importanceMap, 4);
IMAGE2D_ARRAY_RO(s_baseSSAO, rg8, 5);
IMAGE2D_ARRAY_WR(s_target, rg8, 6);
// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
float PackEdges( vec4 edgesLRTB )
{
// ivec4 edgesLRTBi = ivec4( saturate( edgesLRTB ) * 3.0 + 0.5 );
// return ( (edgesLRTBi.x << 6) + (edgesLRTBi.y << 4) + (edgesLRTBi.z << 2) + (edgesLRTBi.w << 0) ) / 255.0;
// optimized, should be same as above
edgesLRTB = round( saturate( edgesLRTB ) * 3.05 );
return dot( edgesLRTB, vec4( 64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0 ) ) ;
}
vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
{
vec3 ret;
ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
ret.z = viewspaceDepth;
return ret;
}
// calculate effect radius and fit our screen sampling pattern inside it
void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq )
{
effectRadius = u_effectRadius;
// leaving this out for performance reasons: use something similar if radius needs to scale based on distance
//effectRadius *= pow( pixCenterLength, u_radiusDistanceScalingFunctionPow);
// when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts
const float tooCloseLimitMod = saturate( pixCenterLength * u_effectSamplingRadiusNearLimitRec ) * 0.8 + 0.2;
effectRadius *= tooCloseLimitMod;
// 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence
pixLookupRadiusMod = (0.85 * effectRadius) / pixelDirRBViewspaceSizeAtCenterZ.x;
// used to calculate falloff (both for AO samples and per-sample weights)
falloffCalcMulSq= -1.0f / (effectRadius*effectRadius);
}
vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
{
// slope-sensitive depth-based edge detection
vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
// cheaper version but has artifacts
// edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
// return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
}
vec3 DecodeNormal( vec3 encodedNormal )
{
vec3 normal = encodedNormal * u_normalsUnpackMul.xxx + u_normalsUnpackAdd.xxx;
#if SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION
normal = vec3( dot(normal, u_normalsWorldToViewspaceMatrix0.xyz),
dot(normal, u_normalsWorldToViewspaceMatrix1.xyz),
dot(normal, u_normalsWorldToViewspaceMatrix2.xyz));
#endif
// normal = normalize( normal ); // normalize adds around 2.5% cost on High settings but makes little (PSNR 66.7) visual difference when normals are as in the sample (stored in R8G8B8A8_UNORM,
// // decoded in the shader), however it will likely be required if using different encoding/decoding or the inputs are not normalized, etc.
return normal;
}
vec3 LoadNormal( ivec2 pos )
{
vec3 encodedNormal = imageLoad(s_normalmapSource, pos).xyz;
return DecodeNormal( encodedNormal );
}
vec3 LoadNormal( ivec2 pos, ivec2 offset )
{
vec3 encodedNormal = imageLoad(s_normalmapSource, pos + offset ).xyz;
return DecodeNormal( encodedNormal );
}
// all vectors in viewspace
float CalculatePixelObscurance( vec3 pixelNormal, vec3 hitDelta, float falloffCalcMulSq )
{
float lengthSq = dot( hitDelta, hitDelta );
float NdotD = dot(pixelNormal, hitDelta) / sqrt(lengthSq);
float falloffMult = max( 0.0, lengthSq * falloffCalcMulSq + 1.0 );
return max( 0, NdotD - u_effectHorizonAngleThreshold ) * falloffMult;
}
void SSAOTapInner( const int qualityLevel, inout float obscuranceSum, inout float weightSum, const vec2 samplingUV, const float mipLevel, const vec3 pixCenterPos, const vec3 negViewspaceDir,vec3 pixelNormal, const float falloffCalcMulSq, const float weightMod, const int dbgTapIndex)
{
// get depth at sample
float viewspaceSampleZ = texture2DLod(s_viewspaceDepthSource, samplingUV.xy, mipLevel ).x;
// convert to viewspace
vec3 hitPos = NDCToViewspace( samplingUV.xy, viewspaceSampleZ ).xyz;
vec3 hitDelta = hitPos - pixCenterPos;
float obscurance = CalculatePixelObscurance( pixelNormal, hitDelta, falloffCalcMulSq );
float weight = 1.0;
if( qualityLevel >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET )
{
//float reduct = max( 0, dot( hitDelta, negViewspaceDir ) );
float reduct = max( 0, -hitDelta.z ); // cheaper, less correct version
reduct = saturate( reduct * u_negRecEffectRadius + 2.0 ); // saturate( 2.0 - reduct / u_effectRadius );
weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT);
}
weight *= weightMod;
obscuranceSum += obscurance * weight;
weightSum += weight;
}
void SSAOTap( const int qualityLevel, inout float obscuranceSum, inout float weightSum, const int tapIndex, const mat2 rotScale, const vec3 pixCenterPos, const vec3 negViewspaceDir, vec3 pixelNormal, const vec2 normalizedScreenPos, const float mipOffset, const float falloffCalcMulSq, float weightMod, vec2 normXY, float normXYLength)
{
vec2 sampleOffset;
float samplePow2Len;
// patterns
{
vec4 newSample = g_samplePatternMain[tapIndex];
sampleOffset = mul( rotScale, newSample.xy );
samplePow2Len = newSample.w; // precalculated, same as: samplePow2Len = log2( length( newSample.xy ) );
weightMod *= newSample.z;
}
// snap to pixel center (more correct obscurance math, avoids artifacts)
sampleOffset = round(sampleOffset);
// calculate MIP based on the sample distance from the centre, similar to as described
// in http://graphics.cs.williams.edu/papers/SAOHPG12/.
float mipLevel = ( qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET )?(0):(samplePow2Len + mipOffset);
#if BGFX_SHADER_LANGUAGE_GLSL
sampleOffset.y = -sampleOffset.y;
#endif
vec2 samplingUV = sampleOffset * u_viewport2xPixelSize + normalizedScreenPos;
SSAOTapInner( qualityLevel, obscuranceSum, weightSum, samplingUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2);
// for the second tap, just use the mirrored offset
vec2 sampleOffsetMirroredUV = -sampleOffset;
// tilt the second set of samples so that the disk is effectively rotated by the normal
// effective at removing one set of artifacts, but too expensive for lower quality settings
if( qualityLevel >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET )
{
float dotNorm = dot( sampleOffsetMirroredUV, normXY );
sampleOffsetMirroredUV -= dotNorm * normXYLength * normXY;
sampleOffsetMirroredUV = round(sampleOffsetMirroredUV);
}
// snap to pixel center (more correct obscurance math, avoids artifacts)
vec2 samplingMirroredUV = sampleOffsetMirroredUV * u_viewport2xPixelSize + normalizedScreenPos;
SSAOTapInner( qualityLevel, obscuranceSum, weightSum, samplingMirroredUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2 + 1);
}
// this function is designed to only work with half/half depth at the moment - there's a couple of hardcoded paths that expect pixel/texel size, so it will not work for full res
void GenerateSSAOShadowsInternal( out float outShadowTerm, out vec4 outEdges, out float outWeight,
const vec2 SVPos, const int qualityLevel, bool adaptiveBase)
{
vec2 SVPosRounded = trunc( SVPos );
uvec2 SVPosui = uvec2( SVPosRounded ); //same as uvec2( SVPos )
const uint numberOfTaps = (adaptiveBase)?(SSAO_ADAPTIVE_TAP_BASE_COUNT) : ( g_numTaps[qualityLevel] );
float pixZ, pixLZ, pixTZ, pixRZ, pixBZ;
#if BGFX_SHADER_LANGUAGE_GLSL
vec4 valuesUL = textureGather(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize + vec2(0.0,u_halfViewportPixelSize.y)).wzyx;
vec4 valuesBR = textureGatherOffset(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize + vec2(0.0,u_halfViewportPixelSize.y), ivec2( 1, -1 ) ).wzyx;
#else
vec4 valuesUL = textureGather(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize );
vec4 valuesBR = textureGatherOffset(s_viewspaceDepthSourceMirror, SVPosRounded * u_halfViewportPixelSize, ivec2( 1, 1 ) );
#endif
// get this pixel's viewspace depth
pixZ = valuesUL.y;
// get left right top bottom neighbouring pixels for edge detection (gets compiled out on qualityLevel == 0)
pixLZ = valuesUL.x;
pixTZ = valuesUL.z;
pixRZ = valuesBR.z;
pixBZ = valuesBR.x;
vec2 normalizedScreenPos = SVPosRounded * u_viewport2xPixelSize + u_viewport2xPixelSize_x_025;
vec3 pixCenterPos = NDCToViewspace( normalizedScreenPos, pixZ ); // g
// Load this pixel's viewspace normal
uvec2 fullResCoord = uvec2(SVPosui * 2 + u_perPassFullResCoordOffset.xy);
vec3 pixelNormal = LoadNormal( ivec2(fullResCoord) );
const vec2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * u_ndcToViewMul * u_viewport2xPixelSize; // optimized approximation of: vec2 pixelDirRBViewspaceSizeAtCenterZ = NDCToViewspace( normalizedScreenPos.xy + u_viewportPixelSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy;
float pixLookupRadiusMod;
float falloffCalcMulSq;
// calculate effect radius and fit our screen sampling pattern inside it
float effectViewspaceRadius;
CalculateRadiusParameters( length( pixCenterPos ), pixelDirRBViewspaceSizeAtCenterZ, pixLookupRadiusMod, effectViewspaceRadius, falloffCalcMulSq );
// calculate samples rotation/scaling
mat2 rotScale;
{
// reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead
if( !adaptiveBase && (qualityLevel >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET) )
{
float nearScreenBorder = min( min( normalizedScreenPos.x, 1.0 - normalizedScreenPos.x ), min( normalizedScreenPos.y, 1.0 - normalizedScreenPos.y ) );
nearScreenBorder = saturate( 10.0 * nearScreenBorder + 0.6 );
pixLookupRadiusMod *= nearScreenBorder;
}
// load & update pseudo-random rotation matrix
#if BGFX_SHADER_LANGUAGE_GLSL
uint pseudoRandomIndex = uint( (imageSize(s_target).y-1.0-SVPosRounded.y) * 2 + SVPosRounded.x ) % 5;
#else
uint pseudoRandomIndex = uint( SVPosRounded.y * 2 + SVPosRounded.x ) % 5;
#endif
vec4 rs = u_patternRotScaleMatrices( pseudoRandomIndex );
rotScale = mat2( rs.x * pixLookupRadiusMod, rs.y * pixLookupRadiusMod, rs.z * pixLookupRadiusMod, rs.w * pixLookupRadiusMod );
}
// the main obscurance & sample weight storage
float obscuranceSum = 0.0;
float weightSum = 0.0;
// edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge)
vec4 edgesLRTB = vec4( 1.0, 1.0, 1.0, 1.0 );
// Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats
pixCenterPos *= u_depthPrecisionOffsetMod;
if( !adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET) )
{
edgesLRTB = CalculateEdges( pixZ, pixLZ, pixRZ, pixTZ, pixBZ );
}
// adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection)
if( !adaptiveBase && (qualityLevel >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET) )
{
// disable in case of quality level 4 (reference)
if( qualityLevel != 4 )
{
//approximate neighbouring pixels positions (actually just deltas or "positions - pixCenterPos" )
vec3 viewspaceDirZNormalized = vec3( pixCenterPos.xy / pixCenterPos.zz, 1.0 );
vec3 pixLDelta = vec3( -pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0 ) + viewspaceDirZNormalized * (pixLZ - pixCenterPos.z); // very close approximation of: vec3 pixLPos = NDCToViewspace( normalizedScreenPos + vec2( -u_halfViewportPixelSize.x, 0.0 ), pixLZ ).xyz - pixCenterPos.xyz;
vec3 pixRDelta = vec3( +pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0 ) + viewspaceDirZNormalized * (pixRZ - pixCenterPos.z); // very close approximation of: vec3 pixRPos = NDCToViewspace( normalizedScreenPos + vec2( +u_halfViewportPixelSize.x, 0.0 ), pixRZ ).xyz - pixCenterPos.xyz;
#if BGFX_SHADER_LANGUAGE_GLSL
vec3 pixTDelta = vec3( 0.0, +pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixTZ - pixCenterPos.z); // very close approximation of: vec3 pixTPos = NDCToViewspace( normalizedScreenPos + vec2( 0.0, -u_halfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz;
vec3 pixBDelta = vec3( 0.0, -pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixBZ - pixCenterPos.z); // very close approximation of: vec3 pixBPos = NDCToViewspace( normalizedScreenPos + vec2( 0.0, +u_halfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz;
#else
vec3 pixTDelta = vec3( 0.0, -pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixTZ - pixCenterPos.z); // very close approximation of: vec3 pixTPos = NDCToViewspace( normalizedScreenPos + vec2( 0.0, -u_halfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz;
vec3 pixBDelta = vec3( 0.0, +pixelDirRBViewspaceSizeAtCenterZ.y, 0.0 ) + viewspaceDirZNormalized * (pixBZ - pixCenterPos.z); // very close approximation of: vec3 pixBPos = NDCToViewspace( normalizedScreenPos + vec2( 0.0, +u_halfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz;
#endif
const float rangeReductionConst = 4.0f; // this is to avoid various artifacts
const float modifiedFalloffCalcMulSq = rangeReductionConst * falloffCalcMulSq;
vec4 additionalObscurance;
additionalObscurance.x = CalculatePixelObscurance( pixelNormal, pixLDelta, modifiedFalloffCalcMulSq );
additionalObscurance.y = CalculatePixelObscurance( pixelNormal, pixRDelta, modifiedFalloffCalcMulSq );
additionalObscurance.z = CalculatePixelObscurance( pixelNormal, pixTDelta, modifiedFalloffCalcMulSq );
additionalObscurance.w = CalculatePixelObscurance( pixelNormal, pixBDelta, modifiedFalloffCalcMulSq );
obscuranceSum += u_detailAOStrength * dot( additionalObscurance, edgesLRTB );
}
}
// Sharp normals also create edges - but this adds to the cost as well
if( !adaptiveBase && (qualityLevel >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET ) )
{
vec3 neighbourNormalL = LoadNormal( ivec2(fullResCoord), ivec2( -2, 0 ) );
vec3 neighbourNormalR = LoadNormal( ivec2(fullResCoord), ivec2( 2, 0 ) );
#if BGFX_SHADER_LANGUAGE_GLSL
vec3 neighbourNormalT = LoadNormal( ivec2(fullResCoord), ivec2( 0, 2 ) );
vec3 neighbourNormalB = LoadNormal( ivec2(fullResCoord), ivec2( 0, -2 ) );
#else
vec3 neighbourNormalT = LoadNormal( ivec2(fullResCoord), ivec2( 0, -2 ) );
vec3 neighbourNormalB = LoadNormal( ivec2(fullResCoord), ivec2( 0, 2 ) );
#endif
const float dotThreshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD;
vec4 normalEdgesLRTB;
normalEdgesLRTB.x = saturate( (dot( pixelNormal, neighbourNormalL ) + dotThreshold ) );
normalEdgesLRTB.y = saturate( (dot( pixelNormal, neighbourNormalR ) + dotThreshold ) );
normalEdgesLRTB.z = saturate( (dot( pixelNormal, neighbourNormalT ) + dotThreshold ) );
normalEdgesLRTB.w = saturate( (dot( pixelNormal, neighbourNormalB ) + dotThreshold ) );
//#define SSAO_SMOOTHEN_NORMALS // fixes some aliasing artifacts but kills a lot of high detail and adds to the cost - not worth it probably but feel free to play with it
#ifdef SSAO_SMOOTHEN_NORMALS
//neighbourNormalL = LoadNormal( fullResCoord, ivec2( -1, 0 ) );
//neighbourNormalR = LoadNormal( fullResCoord, ivec2( 1, 0 ) );
//neighbourNormalT = LoadNormal( fullResCoord, ivec2( 0, -1 ) );
//neighbourNormalB = LoadNormal( fullResCoord, ivec2( 0, 1 ) );
pixelNormal += neighbourNormalL * edgesLRTB.x + neighbourNormalR * edgesLRTB.y + neighbourNormalT * edgesLRTB.z + neighbourNormalB * edgesLRTB.w;
pixelNormal = normalize( pixelNormal );
#endif
edgesLRTB *= normalEdgesLRTB;
}
const float globalMipOffset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET;
float mipOffset = ( qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET ) ? ( 0 ) : ( log2( pixLookupRadiusMod ) + globalMipOffset );
// Used to tilt the second set of samples so that the disk is effectively rotated by the normal
// effective at removing one set of artifacts, but too expensive for lower quality settings
vec2 normXY = vec2( pixelNormal.x, pixelNormal.y );
float normXYLength = length( normXY );
normXY /= vec2( normXYLength, -normXYLength );
normXYLength *= SSAO_TILT_SAMPLES_AMOUNT;
const vec3 negViewspaceDir = -normalize( pixCenterPos );
// standard, non-adaptive approach
if( (qualityLevel != 3) || adaptiveBase )
{
// [unroll] // <- doesn't seem to help on any platform, although the compilers seem to unroll anyway if const number of tap used!
for( uint i = 0; i < numberOfTaps; i++ )
{
SSAOTap( qualityLevel, obscuranceSum, weightSum, int(i), rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, mipOffset, falloffCalcMulSq, 1.0, normXY, normXYLength);
}
}
else // if( qualityLevel == 3 ) adaptive approach
{
// add new ones if needed
vec2 fullResUV = normalizedScreenPos + u_perPassFullResUVOffset.xy;
float importance = texture2DLod(s_importanceMap, fullResUV, 0.0 ).x;
// this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance
obscuranceSum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS));
// load existing base values
vec2 baseValues = imageLoad(s_baseSSAO, ivec3( SVPosui, u_passIndex ) ).xy;
weightSum += baseValues.y * (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0);
obscuranceSum += (baseValues.x) * weightSum;
// increase importance around edges
float edgeCount = dot( 1.0-edgesLRTB, vec4( 1.0, 1.0, 1.0, 1.0 ) );
//importance += edgeCount / (float)SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT;
float avgTotalImportance = float(imageLoad(s_loadCounter,ivec2(0,0)).x) * u_loadCounterAvgDiv;
float importanceLimiter = saturate( u_adaptiveSampleCountLimit / avgTotalImportance );
importance *= importanceLimiter;
float additionalSampleCountFlt = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance;
const float blendRange = 3.0; // use 1 to just blend the last one; use larger number to blend over more for a more smooth transition
const float blendRangeInv = 1.0 / blendRange;
additionalSampleCountFlt += 0.5;
uint additionalSamples = uint( additionalSampleCountFlt );
uint additionalSamplesTo = min( SSAO_MAX_TAPS, additionalSamples + SSAO_ADAPTIVE_TAP_BASE_COUNT );
// additional manual unroll doesn't help unfortunately
LOOP
for( uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additionalSamplesTo; i++ )
{
additionalSampleCountFlt -= 1.0f;
float weightMod = saturate(additionalSampleCountFlt * blendRangeInv); // slowly blend in the last few samples
SSAOTap( qualityLevel, obscuranceSum, weightSum, int(i), rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, mipOffset, falloffCalcMulSq, weightMod, normXY, normXYLength);
}
}
// early out for adaptive base - just output weight (used for the next pass)
if( adaptiveBase )
{
float obscurance = obscuranceSum / weightSum;
outShadowTerm = obscurance;
outEdges = vec4(0,0,0,0);
outWeight = weightSum;
return;
}
// calculate weighted average
float obscurance = obscuranceSum / weightSum;
// calculate fadeout (1 close, gradient, 0 far)
float fadeOut = saturate( pixCenterPos.z * u_effectFadeOutMul + u_effectFadeOutAdd );
// Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one)
if( !adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET) )
{
// float edgeCount = dot( 1.0-edgesLRTB, vec4( 1.0, 1.0, 1.0, 1.0 ) );
// when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts
float edgeFadeoutFactor = saturate( (1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35) + saturate( (1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35 );
// (experimental) if you want to reduce the effect next to any edge
// edgeFadeoutFactor += 0.1 * saturate( dot( 1 - edgesLRTB, vec4( 1, 1, 1, 1 ) ) );
fadeOut *= saturate( 1.0 - edgeFadeoutFactor );
}
// same as a bove, but a lot more conservative version
// fadeOut *= saturate( dot( edgesLRTB, vec4( 0.9, 0.9, 0.9, 0.9 ) ) - 2.6 );
// strength
obscurance = u_effectShadowStrength * obscurance;
// clamp
obscurance = min( obscurance, u_effectShadowClamp );
// fadeout
obscurance *= fadeOut;
// conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility),
// to be in line with what is more commonly used.
float occlusion = 1.0 - obscurance;
// modify the gradient
// note: this cannot be moved to a later pass because of loss of precision after storing in the render target
occlusion = pow( saturate( occlusion ), u_effectShadowPow );
// outputs!
outShadowTerm = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit)
outEdges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc.
outWeight = weightSum;
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
float outShadowTerm;
float outWeight;
vec4 outEdges;
GenerateSSAOShadowsInternal( outShadowTerm, outEdges, outWeight, vec2(dtID.xy), ASSAO_QUALITY, ASSAO_ADAPTIVE_BASE);
vec2 out0;
out0.x = outShadowTerm;
if ( ASSAO_ADAPTIVE_BASE )
{
out0.y = outWeight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0); //0.0; //frac(outWeight / 6.0);// / (float)(SSAO_MAX_TAPS * 4.0);
}
else
{
if (ASSAO_QUALITY == 0)
out0.y = PackEdges( vec4( 1, 1, 1, 1 ) ); // no edges in low quality
else
out0.y = PackEdges( outEdges );
}
imageStore(s_target, ivec3(dtID.xy, u_layer), out0.xyyy);
}
}

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#define ASSAO_QUALITY 0
#define ASSAO_ADAPTIVE_BASE false
#include "cs_assao_generate_q.sh"

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#define ASSAO_QUALITY 1
#define ASSAO_ADAPTIVE_BASE false
#include "cs_assao_generate_q.sh"

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#define ASSAO_QUALITY 2
#define ASSAO_ADAPTIVE_BASE false
#include "cs_assao_generate_q.sh"

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#define ASSAO_QUALITY 3
#define ASSAO_ADAPTIVE_BASE false
#include "cs_assao_generate_q.sh"

View File

@ -0,0 +1,9 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#define ASSAO_QUALITY 3
#define ASSAO_ADAPTIVE_BASE true
#include "cs_assao_generate_q.sh"

View File

@ -0,0 +1,15 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
UIMAGE2D_WR(s_loadCounterOutputUAV, r32ui, 0);
NUM_THREADS(1, 1, 1)
void main()
{
imageStore(s_loadCounterOutputUAV, ivec2(0, 0), uvec4(0,0,0,0));
}

View File

@ -0,0 +1,29 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2DARRAY(s_finalSSAO, 1);
// edge-ignorant blur & apply (for the lowest quality level 0)
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_viewportPixelSize;
float a = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 0 ), 0.0 ).x;
float b = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 1 ), 0.0 ).x;
float c = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 2 ), 0.0 ).x;
float d = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 3 ), 0.0 ).x;
float avg = (a+b+c+d) * 0.25;
avg = pow(avg,1.0/2.2);
imageStore(s_target, ivec2(dtID.xy), avg.xxxx);
}
}

View File

@ -0,0 +1,37 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_ARRAY_WR(s_target, rg8, 0);
SAMPLER2DARRAY(s_blurInput, 1);
// edge-ignorant blur in x and y directions, 9 pixels touched (for the lowest quality level 0)
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
vec2 halfPixel = u_halfViewportPixelSize * 0.5f;
#if BGFX_SHADER_LANGUAGE_GLSL
halfPixel.y = -halfPixel.y;
#endif
vec2 centre = texture2DArrayLod(s_blurInput, vec3(inUV, 0.0), 0.0 ).xy;
vec4 vals;
vals.x = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( -halfPixel.x * 3, -halfPixel.y ),0.0) , 0.0 ).x;
vals.y = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( +halfPixel.x, -halfPixel.y * 3 ),0.0) , 0.0 ).x;
vals.z = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( -halfPixel.x, +halfPixel.y * 3 ),0.0) , 0.0 ).x;
vals.w = texture2DArrayLod(s_blurInput, vec3(inUV + vec2( +halfPixel.x * 3, +halfPixel.y ),0.0) , 0.0 ).x;
imageStore(s_target, ivec3(dtID.xy,u_layer), vec4(dot( vals, 0.2.xxxx ) + centre.x * 0.2, centre.y, 0.0, 0.0));
}
}

View File

@ -0,0 +1,26 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2DARRAY(s_finalSSAO, 1);
// edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true )
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_viewportPixelSize;
float a = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 0 ), 0.0 ).x;
float d = texture2DArrayLod(s_finalSSAO, vec3( inUV.xy, 3 ), 0.0 ).x;
float avg = (a+d) * 0.5;
avg = pow(avg,1.0/2.2);
imageStore(s_target, ivec2(dtID.xy), avg.xxxx);
}
}

View File

@ -0,0 +1,47 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2D(s_importanceMap, 1);
// Shaders below only needed for adaptive quality level
CONST(float cSmoothenImportance) = 1.0;
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
uvec2 pos = uvec2(dtID.xy);
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_quarterResPixelSize;
float centre = texture2DLod(s_importanceMap, inUV, 0.0 ).x;
//return centre;
vec2 halfPixel = u_quarterResPixelSize * 0.5f;
#if BGFX_SHADER_LANGUAGE_GLSL
halfPixel.y = -halfPixel.y;
#endif
vec4 vals;
vals.x = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x * 3, -halfPixel.y ), 0.0 ).x;
vals.y = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x, -halfPixel.y * 3 ), 0.0 ).x;
vals.z = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x * 3, +halfPixel.y ), 0.0 ).x;
vals.w = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x, +halfPixel.y * 3 ), 0.0 ).x;
float avgVal = dot( vals, vec4( 0.25, 0.25, 0.25, 0.25 ) );
vals.xy = max( vals.xy, vals.zw );
float maxVal = max( centre, max( vals.x, vals.y ) );
imageStore(s_target, ivec2(dtID.xy), mix( maxVal, avgVal, cSmoothenImportance ).xxxx);
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_WR(s_target, r8, 0);
SAMPLER2D(s_importanceMap, 1);
UIMAGE2D_RW(s_loadCounterOutputUAV, r32ui, 2);
CONST(float cSmoothenImportance) = 1.0;
// Shaders below only needed for adaptive quality level
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_quarterResPixelSize;
float centre = texture2DLod(s_importanceMap, inUV, 0.0 ).x;
//return centre;
vec2 halfPixel = u_quarterResPixelSize * 0.5f;
vec4 vals;
vals.x = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x, -halfPixel.y * 3 ), 0.0 ).x;
vals.y = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x * 3, -halfPixel.y ), 0.0 ).x;
vals.z = texture2DLod(s_importanceMap, inUV + vec2( +halfPixel.x, +halfPixel.y * 3 ), 0.0 ).x;
vals.w = texture2DLod(s_importanceMap, inUV + vec2( -halfPixel.x * 3, +halfPixel.y ), 0.0 ).x;
float avgVal = dot( vals, vec4( 0.25, 0.25, 0.25, 0.25 ) );
vals.xy = max( vals.xy, vals.zw );
float maxVal = max( centre, max( vals.x, vals.y ) );
float retVal = mix( maxVal, avgVal, cSmoothenImportance );
// sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel
uint sum = uint(saturate(retVal) * 255.0 + 0.5);
// save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying LoadCounterAvgDiv by 9
#if BGFX_SHADER_LANGUAGE_GLSL
if( ((dtID.x % 3) + ((dim.y-1-dtID.y) % 3)) == 0 )
#else
if( ((dtID.x % 3) + (dtID.y % 3)) == 0 )
#endif
imageAtomicAdd(s_loadCounterOutputUAV, ivec2(0, 0), sum );
imageStore(s_target, ivec2(dtID.xy), retVal.xxxx);
}
}

View File

@ -0,0 +1,103 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_RO(s_viewspaceDepthSource0, r16f, 0);
IMAGE2D_RO(s_viewspaceDepthSource1, r16f, 1);
IMAGE2D_RO(s_viewspaceDepthSource2, r16f, 2);
IMAGE2D_RO(s_viewspaceDepthSource3, r16f, 3);
IMAGE2D_WR(s_target0, r16f, 4);
IMAGE2D_WR(s_target1, r16f, 5);
IMAGE2D_WR(s_target2, r16f, 6);
IMAGE2D_WR(s_target3, r16f, 7);
// calculate effect radius and fit our screen sampling pattern inside it
void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq )
{
effectRadius = u_effectRadius;
// leaving this out for performance reasons: use something similar if radius needs to scale based on distance
//effectRadius *= pow( pixCenterLength, u_radiusDistanceScalingFunctionPow);
// when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts
const float tooCloseLimitMod = saturate( pixCenterLength * u_effectSamplingRadiusNearLimitRec ) * 0.8 + 0.2;
effectRadius *= tooCloseLimitMod;
// 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence
pixLookupRadiusMod = (0.85 * effectRadius) / pixelDirRBViewspaceSizeAtCenterZ.x;
// used to calculate falloff (both for AO samples and per-sample weights)
falloffCalcMulSq= -1.0f / (effectRadius*effectRadius);
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = uvec2(u_rect.zw);
if (all(lessThan(dtID.xy, dim) ) )
{
ivec2 baseCoords = ivec2(dtID.xy) * 2;
vec4 depthsArr[4];
float depthsOutArr[4];
// how to Gather a specific mip level?
depthsArr[0].x = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 0, 0 )).x ;
depthsArr[0].y = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 1, 0 )).x ;
depthsArr[0].z = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 0, 1 )).x ;
depthsArr[0].w = imageLoad(s_viewspaceDepthSource0, baseCoords + ivec2( 1, 1 )).x ;
depthsArr[1].x = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 0, 0 )).x;
depthsArr[1].y = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 1, 0 )).x;
depthsArr[1].z = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 0, 1 )).x;
depthsArr[1].w = imageLoad(s_viewspaceDepthSource1, baseCoords + ivec2( 1, 1 )).x;
depthsArr[2].x = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 0, 0 )).x;
depthsArr[2].y = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 1, 0 )).x;
depthsArr[2].z = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 0, 1 )).x;
depthsArr[2].w = imageLoad(s_viewspaceDepthSource2, baseCoords + ivec2( 1, 1 )).x;
depthsArr[3].x = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 0, 0 )).x;
depthsArr[3].y = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 1, 0 )).x;
depthsArr[3].z = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 0, 1 )).x;
depthsArr[3].w = imageLoad(s_viewspaceDepthSource3, baseCoords + ivec2( 1, 1 )).x;
const uvec2 SVPosui = uvec2( dtID.xy );
const uint pseudoRandomA = (SVPosui.x ) + 2 * (SVPosui.y );
float dummyUnused1;
float dummyUnused2;
float falloffCalcMulSq, falloffCalcAdd;
UNROLL
for( int i = 0; i < 4; i++ )
{
vec4 depths = depthsArr[i];
float closest = min( min( depths.x, depths.y ), min( depths.z, depths.w ) );
CalculateRadiusParameters( abs( closest ), vec2(1.0,1.0), dummyUnused1, dummyUnused2, falloffCalcMulSq );
vec4 dists = depths - closest.xxxx;
vec4 weights = saturate( dists * dists * falloffCalcMulSq + 1.0 );
float smartAvg = dot( weights, depths ) / dot( weights, vec4( 1.0, 1.0, 1.0, 1.0 ) );
const uint pseudoRandomIndex = ( pseudoRandomA + i ) % 4;
//depthsOutArr[i] = closest;
//depthsOutArr[i] = depths[ pseudoRandomIndex ];
depthsOutArr[i] = smartAvg;
}
imageStore(s_target0, ivec2(dtID.xy), depthsOutArr[0].xxxx);
imageStore(s_target1, ivec2(dtID.xy), depthsOutArr[1].xxxx);
imageStore(s_target2, ivec2(dtID.xy), depthsOutArr[2].xxxx);
imageStore(s_target3, ivec2(dtID.xy), depthsOutArr[3].xxxx);
}
}

View File

@ -0,0 +1,58 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
SAMPLER2D(s_depthSource, 0);
IMAGE2D_WR(s_target0, r16f, 1);
IMAGE2D_WR(s_target1, r16f, 2);
IMAGE2D_WR(s_target2, r16f, 3);
IMAGE2D_WR(s_target3, r16f, 4);
float ScreenSpaceToViewSpaceDepth( float screenDepth )
{
float depthLinearizeMul = u_depthUnpackConsts.x;
float depthLinearizeAdd = u_depthUnpackConsts.y;
// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
// Set your depthLinearizeMul and depthLinearizeAdd to:
// depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
// depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target0).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
ivec2 baseCoord = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
float b = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
float c = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
#else
float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
float b = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
float c = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
#endif
imageStore(s_target0, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( a ).xxxx);
imageStore(s_target1, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( b ).xxxx);
imageStore(s_target2, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( c ).xxxx);
imageStore(s_target3, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( d ).xxxx);
}
}

View File

@ -0,0 +1,192 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
SAMPLER2D(s_depthSource, 0);
IMAGE2D_WR(s_target0, r16f, 1);
IMAGE2D_WR(s_target1, r16f, 2);
IMAGE2D_WR(s_target2, r16f, 3);
IMAGE2D_WR(s_target3, r16f, 4);
IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
float ScreenSpaceToViewSpaceDepth( float screenDepth )
{
float depthLinearizeMul = u_depthUnpackConsts.x;
float depthLinearizeAdd = u_depthUnpackConsts.y;
// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
// Set your depthLinearizeMul and depthLinearizeAdd to:
// depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
// depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
}
vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
{
vec3 ret;
ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
ret.z = viewspaceDepth;
return ret;
}
vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
{
// slope-sensitive depth-based edge detection
vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
// cheaper version but has artifacts
// edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
// return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
}
vec3 CalculateNormal( const vec4 edgesLRTB, vec3 pixCenterPos, vec3 pixLPos, vec3 pixRPos, vec3 pixTPos, vec3 pixBPos )
{
// Get this pixel's viewspace normal
vec4 acceptedNormals = vec4( edgesLRTB.x*edgesLRTB.z, edgesLRTB.z*edgesLRTB.y, edgesLRTB.y*edgesLRTB.w, edgesLRTB.w*edgesLRTB.x );
pixLPos = normalize(pixLPos - pixCenterPos);
pixRPos = normalize(pixRPos - pixCenterPos);
pixTPos = normalize(pixTPos - pixCenterPos);
pixBPos = normalize(pixBPos - pixCenterPos);
vec3 pixelNormal = vec3( 0, 0, -0.0005 );
pixelNormal += ( acceptedNormals.x ) * cross( pixLPos, pixTPos );
pixelNormal += ( acceptedNormals.y ) * cross( pixTPos, pixRPos );
pixelNormal += ( acceptedNormals.z ) * cross( pixRPos, pixBPos );
pixelNormal += ( acceptedNormals.w ) * cross( pixBPos, pixLPos );
pixelNormal = normalize( pixelNormal );
return pixelNormal;
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target0).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
ivec2 baseCoords = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.75)) * u_viewport2xPixelSize;
#else
vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.25)) * u_viewport2xPixelSize;
#endif
ivec2 baseCoord = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
#else
float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
#endif
imageStore(s_target0, ivec2(dtID.xy), z0.xxxx );
imageStore(s_target1, ivec2(dtID.xy), z1.xxxx );
imageStore(s_target2, ivec2(dtID.xy), z2.xxxx );
imageStore(s_target3, ivec2(dtID.xy), z3.xxxx );
float pixZs[4][4];
// middle 4
pixZs[1][1] = z0;
pixZs[2][1] = z1;
pixZs[1][2] = z2;
pixZs[2][2] = z3;
#if BGFX_SHADER_LANGUAGE_GLSL
// left 2
pixZs[0][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x );
pixZs[0][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, -1 ) ).x );
// right 2
pixZs[3][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 0 ) ).x );
pixZs[3][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, -1 ) ).x );
// top 2
pixZs[1][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, 1 ) ).x );
pixZs[2][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, 1 ) ).x );
// bottom 2
pixZs[1][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, -2 ) ).x );
pixZs[2][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, -2 ) ).x );
#else
// left 2
pixZs[0][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x );
pixZs[0][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 1 ) ).x );
// right 2
pixZs[3][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 0 ) ).x );
pixZs[3][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 1 ) ).x );
// top 2
pixZs[1][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, -1 ) ).x );
pixZs[2][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, -1 ) ).x );
// bottom 2
pixZs[1][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, 2 ) ).x );
pixZs[2][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, 2 ) ).x );
#endif
vec4 edges0 = CalculateEdges( pixZs[1][1], pixZs[0][1], pixZs[2][1], pixZs[1][0], pixZs[1][2] );
vec4 edges1 = CalculateEdges( pixZs[2][1], pixZs[1][1], pixZs[3][1], pixZs[2][0], pixZs[2][2] );
vec4 edges2 = CalculateEdges( pixZs[1][2], pixZs[0][2], pixZs[2][2], pixZs[1][1], pixZs[1][3] );
vec4 edges3 = CalculateEdges( pixZs[2][2], pixZs[1][2], pixZs[3][2], pixZs[2][1], pixZs[2][3] );
vec2 viewportPixelSize = u_viewportPixelSize;
#if BGFX_SHADER_LANGUAGE_GLSL
viewportPixelSize.y = -viewportPixelSize.y;
#endif
vec3 pixPos[4][4];
// middle 4
pixPos[1][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 0.0 ), pixZs[1][1] );
pixPos[2][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 0.0 ), pixZs[2][1] );
pixPos[1][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 1.0 ), pixZs[1][2] );
pixPos[2][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 1.0 ), pixZs[2][2] );
// left 2
pixPos[0][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0, 0.0), pixZs[0][1] );
pixPos[0][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0, 1.0), pixZs[0][2] );
// right 2
pixPos[3][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 2.0, 0.0), pixZs[3][1] );
pixPos[3][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 2.0, 1.0), pixZs[3][2] );
// top 2
pixPos[1][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, -1.0 ), pixZs[1][0] );
pixPos[2][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, -1.0 ), pixZs[2][0] );
// bottom 2
pixPos[1][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 2.0 ), pixZs[1][3] );
pixPos[2][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 2.0 ), pixZs[2][3] );
vec3 norm0 = CalculateNormal( edges0, pixPos[1][1], pixPos[0][1], pixPos[2][1], pixPos[1][0], pixPos[1][2] );
vec3 norm1 = CalculateNormal( edges1, pixPos[2][1], pixPos[1][1], pixPos[3][1], pixPos[2][0], pixPos[2][2] );
vec3 norm2 = CalculateNormal( edges2, pixPos[1][2], pixPos[0][2], pixPos[2][2], pixPos[1][1], pixPos[1][3] );
vec3 norm3 = CalculateNormal( edges3, pixPos[2][2], pixPos[1][2], pixPos[3][2], pixPos[2][1], pixPos[2][3] );
#if BGFX_SHADER_LANGUAGE_GLSL
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm1 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm2 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
#else
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm1 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm2 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
#endif
}
}

View File

@ -0,0 +1,188 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
SAMPLER2D(s_depthSource, 0);
IMAGE2D_WR(s_target0, r16f, 1);
IMAGE2D_WR(s_target1, r16f, 2);
IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
float ScreenSpaceToViewSpaceDepth( float screenDepth )
{
float depthLinearizeMul = u_depthUnpackConsts.x;
float depthLinearizeAdd = u_depthUnpackConsts.y;
// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
// Set your depthLinearizeMul and depthLinearizeAdd to:
// depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
// depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
}
vec3 NDCToViewspace( vec2 pos, float viewspaceDepth )
{
vec3 ret;
ret.xy = (u_ndcToViewMul * pos.xy + u_ndcToViewAdd) * viewspaceDepth;
ret.z = viewspaceDepth;
return ret;
}
vec4 CalculateEdges( const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ )
{
// slope-sensitive depth-based edge detection
vec4 edgesLRTB = vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ;
vec4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz;
edgesLRTB = min( abs( edgesLRTB ), abs( edgesLRTBSlopeAdjusted ) );
return saturate( ( 1.3 - edgesLRTB / (centerZ * 0.040) ) );
// cheaper version but has artifacts
// edgesLRTB = abs( vec4( leftZ, rightZ, topZ, bottomZ ) - centerZ; );
// return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) );
}
vec3 CalculateNormal( const vec4 edgesLRTB, vec3 pixCenterPos, vec3 pixLPos, vec3 pixRPos, vec3 pixTPos, vec3 pixBPos )
{
// Get this pixel's viewspace normal
vec4 acceptedNormals = vec4( edgesLRTB.x*edgesLRTB.z, edgesLRTB.z*edgesLRTB.y, edgesLRTB.y*edgesLRTB.w, edgesLRTB.w*edgesLRTB.x );
pixLPos = normalize(pixLPos - pixCenterPos);
pixRPos = normalize(pixRPos - pixCenterPos);
pixTPos = normalize(pixTPos - pixCenterPos);
pixBPos = normalize(pixBPos - pixCenterPos);
vec3 pixelNormal = vec3( 0, 0, -0.0005 );
pixelNormal += ( acceptedNormals.x ) * cross( pixLPos, pixTPos );
pixelNormal += ( acceptedNormals.y ) * cross( pixTPos, pixRPos );
pixelNormal += ( acceptedNormals.z ) * cross( pixRPos, pixBPos );
pixelNormal += ( acceptedNormals.w ) * cross( pixBPos, pixLPos );
pixelNormal = normalize( pixelNormal );
return pixelNormal;
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target0).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
ivec2 baseCoords = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.75)) * u_viewport2xPixelSize;
#else
vec2 upperLeftUV = (vec2(dtID.xy) + vec2(0.25,0.25)) * u_viewport2xPixelSize;
#endif
ivec2 baseCoord = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
#else
float z0 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 0 ) ).x );
float z1 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 0 ) ).x );
float z2 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 0, 1 ) ).x );
float z3 = ScreenSpaceToViewSpaceDepth( texelFetchOffset(s_depthSource, baseCoord, 0, ivec2( 1, 1 ) ).x );
#endif
imageStore(s_target0, ivec2(dtID.xy), z0.xxxx );
imageStore(s_target1, ivec2(dtID.xy), z3.xxxx );
float pixZs[4][4];
// middle 4
pixZs[1][1] = z0;
pixZs[2][1] = z1;
pixZs[1][2] = z2;
pixZs[2][2] = z3;
#if BGFX_SHADER_LANGUAGE_GLSL
// left 2
pixZs[0][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x );
pixZs[0][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, -1 ) ).x );
// right 2
pixZs[3][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 0 ) ).x );
pixZs[3][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, -1 ) ).x );
// top 2
pixZs[1][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, 1 ) ).x );
pixZs[2][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, 1 ) ).x );
// bottom 2
pixZs[1][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, -2 ) ).x );
pixZs[2][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, -2 ) ).x );
#else
// left 2
pixZs[0][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 0 ) ).x );
pixZs[0][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( -1, 1 ) ).x );
// right 2
pixZs[3][1] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 0 ) ).x );
pixZs[3][2] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 2, 1 ) ).x );
// top 2
pixZs[1][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, -1 ) ).x );
pixZs[2][0] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, -1 ) ).x );
// bottom 2
pixZs[1][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 0, 2 ) ).x );
pixZs[2][3] = ScreenSpaceToViewSpaceDepth( texture2DLodOffset(s_depthSource, upperLeftUV, 0.0, ivec2( 1, 2 ) ).x );
#endif
vec4 edges0 = CalculateEdges( pixZs[1][1], pixZs[0][1], pixZs[2][1], pixZs[1][0], pixZs[1][2] );
vec4 edges1 = CalculateEdges( pixZs[2][1], pixZs[1][1], pixZs[3][1], pixZs[2][0], pixZs[2][2] );
vec4 edges2 = CalculateEdges( pixZs[1][2], pixZs[0][2], pixZs[2][2], pixZs[1][1], pixZs[1][3] );
vec4 edges3 = CalculateEdges( pixZs[2][2], pixZs[1][2], pixZs[3][2], pixZs[2][1], pixZs[2][3] );
vec2 viewportPixelSize = u_viewportPixelSize;
#if BGFX_SHADER_LANGUAGE_GLSL
viewportPixelSize.y = -viewportPixelSize.y;
#endif
vec3 pixPos[4][4];
// there is probably a way to optimize the math below; however no approximation will work, has to be precise.
// middle 4
pixPos[1][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 0.0 ), pixZs[1][1] );
pixPos[2][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 0.0 ), pixZs[2][1] );
pixPos[1][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 1.0 ), pixZs[1][2] );
pixPos[2][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 1.0 ), pixZs[2][2] );
// left 2
pixPos[0][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0, 0.0), pixZs[0][1] );
//pixPos[0][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( -1.0, 1.0), pixZs[0][2] );
// right 2
//pixPos[3][1] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 2.0, 0.0), pixZs[3][1] );
pixPos[3][2] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 2.0, 1.0), pixZs[3][2] );
// top 2
pixPos[1][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, -1.0 ), pixZs[1][0] );
//pixPos[2][0] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, -1.0 ), pixZs[2][0] );
// bottom 2
//pixPos[1][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 0.0, 2.0 ), pixZs[1][3] );
pixPos[2][3] = NDCToViewspace( upperLeftUV + viewportPixelSize * vec2( 1.0, 2.0 ), pixZs[2][3] );
vec3 norm0 = CalculateNormal( edges0, pixPos[1][1], pixPos[0][1], pixPos[2][1], pixPos[1][0], pixPos[1][2] );
vec3 norm3 = CalculateNormal( edges3, pixPos[2][2], pixPos[1][2], pixPos[3][2], pixPos[2][1], pixPos[2][3] );
#if BGFX_SHADER_LANGUAGE_GLSL
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 1 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 0 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
#else
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 0, 0 ), vec4( norm0 * 0.5 + 0.5, 0.0 ));
imageStore(s_normalsOutputUAV, baseCoords + ivec2( 1, 1 ), vec4( norm3 * 0.5 + 0.5, 0.0 ));
#endif
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
SAMPLER2D(s_depthSource, 0);
IMAGE2D_WR(s_target0, r16f, 1);
IMAGE2D_WR(s_target1, r16f, 2);
float ScreenSpaceToViewSpaceDepth( float screenDepth )
{
float depthLinearizeMul = u_depthUnpackConsts.x;
float depthLinearizeAdd = u_depthUnpackConsts.y;
// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
// Set your depthLinearizeMul and depthLinearizeAdd to:
// depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
// depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear );
return depthLinearizeMul / ( depthLinearizeAdd - screenDepth );
}
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy);
uvec2 dim = imageSize(s_target0).xy;
if (all(lessThan(dtID.xy, dim) ) )
{
ivec2 baseCoord = ivec2(dtID.xy) * 2;
#if BGFX_SHADER_LANGUAGE_GLSL
float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 1 ), 0).x;
float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 0 ), 0).x;
#else
float a = texelFetch(s_depthSource, baseCoord + ivec2( 0, 0 ), 0).x;
float d = texelFetch(s_depthSource, baseCoord + ivec2( 1, 1 ), 0).x;
#endif
imageStore(s_target0, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( a ).xxxx);
imageStore(s_target1, ivec2(dtID.xy), ScreenSpaceToViewSpaceDepth( d ).xxxx);
}
}

View File

@ -0,0 +1,82 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_ARRAY_WR(s_target, rg8, 0);
SAMPLER2DARRAY(s_blurInput, 1);
// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
vec4 UnpackEdges( float _packedVal )
{
uint packedVal = uint(_packedVal * 255.5);
vec4 edgesLRTB;
edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0; // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
return saturate( edgesLRTB + u_invSharpness );
}
// ********************************************************************************************************
// Pixel shader that does smart blurring (to avoid bleeding)
void AddSample( float ssaoValue, float edgeValue, inout float sum, inout float sumWeight )
{
float weight = edgeValue;
sum += (weight * ssaoValue);
sumWeight += weight;
}
vec2 SampleBlurred( ivec2 inPos, vec2 coord )
{
float packedEdges = texelFetch(s_blurInput, ivec3(inPos.xy,0.0), 0 ).y;
vec4 edgesLRTB = UnpackEdges( packedEdges );
#if BGFX_SHADER_LANGUAGE_GLSL
vec4 valuesUL = textureGather(s_blurInput, vec3(coord - u_halfViewportPixelSize * 0.5 + vec2(0.0,u_halfViewportPixelSize.y), 0.0)).wzyx;
vec4 valuesBR = textureGather(s_blurInput, vec3(coord + u_halfViewportPixelSize * 0.5 + vec2(0.0,-u_halfViewportPixelSize.y), 0.0)).wzyx;
#else
vec4 valuesUL = textureGather(s_blurInput, vec3(coord - u_halfViewportPixelSize * 0.5, 0.0));
vec4 valuesBR = textureGather(s_blurInput, vec3(coord + u_halfViewportPixelSize * 0.5, 0.0));
#endif
float ssaoValue = valuesUL.y;
float ssaoValueL = valuesUL.x;
float ssaoValueT = valuesUL.z;
float ssaoValueR = valuesBR.z;
float ssaoValueB = valuesBR.x;
float sumWeight = 0.5f;
float sum = ssaoValue * sumWeight;
AddSample( ssaoValueL, edgesLRTB.x, sum, sumWeight );
AddSample( ssaoValueR, edgesLRTB.y, sum, sumWeight );
AddSample( ssaoValueT, edgesLRTB.z, sum, sumWeight );
AddSample( ssaoValueB, edgesLRTB.w, sum, sumWeight );
float ssaoAvg = sum / sumWeight;
ssaoValue = ssaoAvg; //min( ssaoValue, ssaoAvg ) * 0.2 + ssaoAvg * 0.8;
return vec2( ssaoValue, packedEdges );
}
// edge-sensitive blur
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
imageStore(s_target, ivec3(dtID.xy, u_layer), SampleBlurred( ivec2(dtID.xy), inUV ).xyyy);
}
}

View File

@ -0,0 +1,83 @@
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
#include "uniforms.sh"
IMAGE2D_ARRAY_WR(s_target, rg8, 0);
SAMPLER2DARRAY(s_blurInput, 1);
// unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
vec4 UnpackEdges( float _packedVal )
{
uint packedVal = uint(_packedVal * 255.5);
vec4 edgesLRTB;
edgesLRTB.x = float((packedVal >> 6) & 0x03) / 3.0; // there's really no need for mask (as it's an 8 bit input) but I'll leave it in so it doesn't cause any trouble in the future
edgesLRTB.y = float((packedVal >> 4) & 0x03) / 3.0;
edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0;
edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0;
return saturate( edgesLRTB + u_invSharpness );
}
// ********************************************************************************************************
// Pixel shader that does smart blurring (to avoid bleeding)
void AddSample( float ssaoValue, float edgeValue, inout float sum, inout float sumWeight )
{
float weight = edgeValue;
sum += (weight * ssaoValue);
sumWeight += weight;
}
vec2 SampleBlurredWide(vec3 coord)
{
vec2 vC = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, 0)).xy;
vec2 vL = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(-2, 0)).xy;
vec2 vT = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, -2)).xy;
vec2 vR = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(2, 0)).xy;
vec2 vB = texture2DArrayLodOffset(s_blurInput, coord, 0.0, ivec2(0, 2)).xy;
float packedEdges = vC.y;
vec4 edgesLRTB = UnpackEdges(packedEdges);
edgesLRTB.x *= UnpackEdges(vL.y).y;
edgesLRTB.z *= UnpackEdges(vT.y).w;
edgesLRTB.y *= UnpackEdges(vR.y).x;
edgesLRTB.w *= UnpackEdges(vB.y).z;
float ssaoValue = vC.x;
float ssaoValueL = vL.x;
float ssaoValueT = vT.x;
float ssaoValueR = vR.x;
float ssaoValueB = vB.x;
float sumWeight = 0.8f;
float sum = ssaoValue * sumWeight;
AddSample(ssaoValueL, edgesLRTB.x, sum, sumWeight);
AddSample(ssaoValueR, edgesLRTB.y, sum, sumWeight);
AddSample(ssaoValueT, edgesLRTB.z, sum, sumWeight);
AddSample(ssaoValueB, edgesLRTB.w, sum, sumWeight);
float ssaoAvg = sum / sumWeight;
ssaoValue = ssaoAvg; //min( ssaoValue, ssaoAvg ) * 0.2 + ssaoAvg * 0.8;
return vec2(ssaoValue, packedEdges);
}
// edge-sensitive blur (wider kernel)
NUM_THREADS(8, 8, 1)
void main()
{
uvec2 dtID = uvec2(gl_GlobalInvocationID.xy) + uvec2(u_rect.xy);
if (all(lessThan(dtID.xy, u_rect.zw) ) )
{
vec2 inUV = (dtID.xy+vec2(0.5,0.5)) * u_halfViewportPixelSize;
imageStore(s_target, ivec3(dtID.xy,u_layer), SampleBlurredWide(vec3(inUV,0.0)).xyyy);
}
}

View File

@ -0,0 +1,43 @@
$input v_texcoord0
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "../common/common.sh"
SAMPLER2D(s_color, 0);
SAMPLER2D(s_normal, 1);
SAMPLER2D(s_ao, 2);
uniform vec4 u_combineParams[2];
void main()
{
vec2 tc0 = v_texcoord0 * u_combineParams[1].xy + u_combineParams[1].zw;
vec3 albedoColor = vec3(1.0,1.0,1.0);
if (u_combineParams[0].x > 0.0)
{
albedoColor = texture2D(s_color, tc0).rgb;
}
float light = 1.0;
if (u_combineParams[0].x > 0.0)
{
vec3 n = texture2D(s_normal, tc0).xyz;
// Expand out normal
n = n*2.0-1.0;
vec3 l = normalize(vec3(-0.8,0.75,-1.0));
light = max(0.0,dot(n,l)) * 1.2+ 0.3;
}
float ao = 1.0;
if ( u_combineParams[0].y > 0.0)
{
ao = texture2D(s_ao, tc0).x;
}
gl_FragColor = vec4(albedoColor * light * ao, 1.0f);
}

View File

@ -0,0 +1,22 @@
$input v_normal, v_texcoord0
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "../common/common.sh"
SAMPLER2D(s_albedo, 0);
void main()
{
vec3 normalWorldSpace = v_normal;
// Write normal
gl_FragData[0].xyz = normalWorldSpace.xyz; // Normal is already compressed to [0,1] so can fit in gbuffer
gl_FragData[0].w = 0.0;
// Write color
gl_FragData[1] = texture2D(s_albedo, v_texcoord0);
}

View File

@ -0,0 +1,10 @@
#
# Copyright 2011-2018 Branimir Karadzic. All rights reserved.
# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
#
BGFX_DIR=../..
RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
BUILD_DIR=../../.build
include $(BGFX_DIR)/scripts/shader.mk

View File

@ -0,0 +1,42 @@
uniform vec4 u_params[19];
uniform vec4 u_rect;
#define u_viewportPixelSize u_params[0].xy
#define u_halfViewportPixelSize u_params[0].zw
#define u_depthUnpackConsts u_params[1].xy
#define u_ndcToViewMul u_params[2].xy
#define u_ndcToViewAdd u_params[2].zw
#define u_perPassFullResCoordOffset u_params[3].xy
#define u_perPassFullResUVOffset u_params[3].zw
#define u_viewport2xPixelSize u_params[4].xy
#define u_viewport2xPixelSize_x_025 u_params[4].zw
#define u_effectRadius u_params[5].x
#define u_effectShadowStrength u_params[5].y
#define u_effectShadowPow u_params[5].z
#define u_effectShadowClamp u_params[5].w
#define u_effectFadeOutMul u_params[6].x
#define u_effectFadeOutAdd u_params[6].y
#define u_effectHorizonAngleThreshold u_params[6].z
#define u_effectSamplingRadiusNearLimitRec u_params[6].w
#define u_depthPrecisionOffsetMod u_params[7].x
#define u_negRecEffectRadius u_params[7].y
#define u_loadCounterAvgDiv u_params[7].z
#define u_adaptiveSampleCountLimit u_params[7].w
#define u_invSharpness u_params[8].x
#define u_passIndex u_params[8].y
#define u_quarterResPixelSize u_params[8].zw
#define u_patternRotScaleMatrices(i) u_params[9+(i)]
#define u_normalsUnpackMul u_params[14].x
#define u_normalsUnpackAdd u_params[14].y
#define u_detailAOStrength u_params[14].z
#define u_layer u_params[14].w
#define u_normalsWorldToViewspaceMatrix0 u_params[15]
#define u_normalsWorldToViewspaceMatrix1 u_params[16]
#define u_normalsWorldToViewspaceMatrix2 u_params[17]
#define u_normalsWorldToViewspaceMatrix3 u_params[18]
#define SSAO_MAX_TAPS 32
#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5
#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS-SSAO_ADAPTIVE_TAP_BASE_COUNT)
#define SSAO_DEPTH_MIP_LEVELS 4
#define SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION 1

View File

@ -0,0 +1,7 @@
vec4 a_position : POSITION;
vec2 a_texcoord0 : TEXCOORD0;
vec3 a_normal : NORMAL;
vec2 v_texcoord0 : TEXCOORD0;
vec3 v_normal : NORMAL = vec3(0.0, 0.0, 1.0);

View File

@ -0,0 +1,16 @@
$input a_position, a_texcoord0
$output v_texcoord0
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "../common/common.sh"
void main()
{
gl_Position = mul(u_modelViewProj, vec4(a_position.xyz, 1.0) );
v_texcoord0 = a_texcoord0;
}

View File

@ -0,0 +1,27 @@
$input a_position, a_normal, a_texcoord0
$output v_normal, v_texcoord0
/*
* Copyright 2018 Attila Kocsis. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "../common/common.sh"
void main()
{
// Calculate vertex position
vec3 pos = a_position.xyz;
gl_Position = mul(u_modelViewProj, vec4(pos, 1.0) );
// Calculate normal. Note that compressed normal is stored in the vertices
vec3 normalObjectSpace = a_normal.xyz*2.0+-1.0; // Normal is stored in [0,1], remap to [-1,1].
// Transform normal into world space.
vec3 normalWorldSpace = mul(u_model[0], vec4(normalObjectSpace, 0.0) ).xyz;
// Normalize to remove (uniform...) scaling, however, recompress
v_normal.xyz = normalize(normalWorldSpace)*0.5+0.5;
v_texcoord0 = a_texcoord0 * 16.0;
}