diff --git a/3rdparty/meshoptimizer/src/meshoptimizer.h b/3rdparty/meshoptimizer/src/meshoptimizer.h index f3dcac277..71088c462 100644 --- a/3rdparty/meshoptimizer/src/meshoptimizer.h +++ b/3rdparty/meshoptimizer/src/meshoptimizer.h @@ -304,13 +304,22 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t cou * Input data must contain 4 floats for every quaternion (count*4 total). * * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). - * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4. + * Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4. * Input data must contain stride/4 floats for every vector (count*stride/4 total). - * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly. */ +enum meshopt_EncodeExpMode +{ + /* When encoding exponents, use separate values for each component (maximum quality) */ + meshopt_EncodeExpSeparate, + /* When encoding exponents, use shared value for all components of each vector (better compression) */ + meshopt_EncodeExpSharedVector, + /* When encoding exponents, use shared value for each component of all vectors (best compression) */ + meshopt_EncodeExpSharedComponent, +}; + MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode); /** * Simplification options diff --git a/3rdparty/meshoptimizer/src/vertexfilter.cpp b/3rdparty/meshoptimizer/src/vertexfilter.cpp index 14a73b1dd..5c7589cc0 100644 --- a/3rdparty/meshoptimizer/src/vertexfilter.cpp +++ b/3rdparty/meshoptimizer/src/vertexfilter.cpp @@ -63,6 +63,10 @@ #define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + namespace meshopt { @@ -185,9 +189,7 @@ inline uint64_t rotateleft64(uint64_t v, int x) { #if defined(_MSC_VER) && !defined(__clang__) return _rotl64(v, x); -// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for -// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions -#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11) +#elif defined(__clang__) && __has_builtin(__builtin_rotateleft64) return __builtin_rotateleft64(v, x); #else return (v << (x & 63)) | (v >> ((64 - x) & 63)); @@ -791,6 +793,33 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) } #endif +// optimized variant of frexp +inline int optlog2(float v) +{ + union + { + float f; + unsigned int ui; + } u; + + u.f = v; + // +1 accounts for implicit 1. in mantissa; denormalized numbers will end up clamped to min_exp by calling code + return u.ui == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1; +} + +// optimized variant of ldexp +inline float optexp2(int e) +{ + union + { + float f; + unsigned int ui; + } u; + + u.ui = unsigned(e + 127) << 23; + return u.f; +} + } // namespace meshopt void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride) @@ -918,39 +947,78 @@ void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, i } } -void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data) +void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode) { - assert(stride > 0 && stride % 4 == 0); + using namespace meshopt; + + assert(stride > 0 && stride % 4 == 0 && stride <= 256); assert(bits >= 1 && bits <= 24); unsigned int* destination = static_cast(destination_); size_t stride_float = stride / sizeof(float); + int component_exp[64]; + assert(stride_float <= sizeof(component_exp) / sizeof(int)); + + const int min_exp = -100; + + if (mode == meshopt_EncodeExpSharedComponent) + { + for (size_t j = 0; j < stride_float; ++j) + component_exp[j] = min_exp; + + for (size_t i = 0; i < count; ++i) + { + const float* v = &data[i * stride_float]; + + // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); + + component_exp[j] = (component_exp[j] < e) ? e : component_exp[j]; + } + } + } + for (size_t i = 0; i < count; ++i) { const float* v = &data[i * stride_float]; unsigned int* d = &destination[i * stride_float]; - // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] - int exp = -100; + int vector_exp = min_exp; - for (size_t j = 0; j < stride_float; ++j) + if (mode == meshopt_EncodeExpSharedVector) { - int e; - frexp(v[j], &e); + // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); - exp = (exp < e) ? e : exp; + vector_exp = (vector_exp < e) ? e : vector_exp; + } + } + else if (mode == meshopt_EncodeExpSeparate) + { + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); + + component_exp[j] = (min_exp < e) ? e : min_exp; + } } - // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) - exp -= (bits - 1); - - // compute renormalized rounded mantissa for each component - int mmask = (1 << 24) - 1; - for (size_t j = 0; j < stride_float; ++j) { - int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f)); + int exp = (mode == meshopt_EncodeExpSharedVector) ? vector_exp : component_exp[j]; + + // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) + exp -= (bits - 1); + + // compute renormalized rounded mantissa for each component + int mmask = (1 << 24) - 1; + + int m = int(v[j] * optexp2(-exp) + (v[j] >= 0 ? 0.5f : -0.5f)); d[j] = (m & mmask) | (unsigned(exp) << 24); }