Updated meshoptimizer.
This commit is contained in:
parent
f3a26ff266
commit
e829a18299
17
3rdparty/meshoptimizer/Makefile
vendored
17
3rdparty/meshoptimizer/Makefile
vendored
@ -24,6 +24,10 @@ CFLAGS=-g -Wall -Wextra -Werror -std=c89
|
||||
CXXFLAGS=-g -Wall -Wextra -Wshadow -Wno-missing-field-initializers -Werror -std=c++98
|
||||
LDFLAGS=
|
||||
|
||||
WASM_SOURCES=src/vertexcodec.cpp src/indexcodec.cpp
|
||||
WASM_EXPORTS=["_meshopt_decodeVertexBuffer","_meshopt_decodeIndexBuffer","_sbrk","__start"]
|
||||
WASM_FLAGS=-O3 -DNDEBUG -s EXPORTED_FUNCTIONS='$(WASM_EXPORTS)' -s ALLOW_MEMORY_GROWTH=1 -s TOTAL_STACK=24576 -s TOTAL_MEMORY=65536
|
||||
|
||||
ifeq ($(config),iphone)
|
||||
IPHONESDK=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk
|
||||
CFLAGS+=-arch armv7 -arch arm64 -isysroot $(IPHONESDK)
|
||||
@ -70,10 +74,17 @@ format:
|
||||
gltfpack: $(GLTFPACK_OBJECTS) $(LIBRARY)
|
||||
$(CXX) $^ $(LDFLAGS) -o $@
|
||||
|
||||
js/meshopt_decoder.js: src/vertexcodec.cpp src/indexcodec.cpp
|
||||
build/decoder_base.wasm: $(WASM_SOURCES)
|
||||
@mkdir -p build
|
||||
emcc $(filter %.cpp,$^) -O3 -DNDEBUG -s EXPORTED_FUNCTIONS='["_meshopt_decodeVertexBuffer", "_meshopt_decodeIndexBuffer", "_sbrk"]' -s ALLOW_MEMORY_GROWTH=1 -s TOTAL_STACK=24576 -s TOTAL_MEMORY=65536 -o build/meshopt_decoder.wasm
|
||||
sed -i "s#\(var wasm = \)\".*\";#\\1\"$$(cat build/meshopt_decoder.wasm | base64 -w 0)\";#" $@
|
||||
emcc $^ $(WASM_FLAGS) -o $@
|
||||
|
||||
build/decoder_simd.wasm: $(WASM_SOURCES)
|
||||
@mkdir -p build
|
||||
emcc $^ $(WASM_FLAGS) -o $@ -munimplemented-simd128 -mbulk-memory
|
||||
|
||||
js/meshopt_decoder.js: build/decoder_base.wasm build/decoder_simd.wasm
|
||||
sed -i "s#\(var wasm_base = \)\".*\";#\\1\"$$(cat build/decoder_base.wasm | hexdump -v -e '1/1 "%02X"')\";#" $@
|
||||
sed -i "s#\(var wasm_simd = \)\".*\";#\\1\"$$(cat build/decoder_simd.wasm | hexdump -v -e '1/1 "%02X"')\";#" $@
|
||||
|
||||
$(EXECUTABLE): $(DEMO_OBJECTS) $(LIBRARY)
|
||||
$(CXX) $^ $(LDFLAGS) -o $@
|
||||
|
29
3rdparty/meshoptimizer/js/meshopt_decoder.js
vendored
29
3rdparty/meshoptimizer/js/meshopt_decoder.js
vendored
File diff suppressed because one or more lines are too long
81
3rdparty/meshoptimizer/src/vertexcodec.cpp
vendored
81
3rdparty/meshoptimizer/src/vertexcodec.cpp
vendored
@ -61,11 +61,14 @@
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#define wasm_v32x4_splat(v, i) wasm_v8x16_shuffle(v, v, 4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3, 4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3, 4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3, 4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3)
|
||||
#define wasm_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
|
||||
#define wasm_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
|
||||
#define wasm_unpacklo_v16x8(a, b) wasm_v8x16_shuffle(a, b, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23)
|
||||
#define wasm_unpackhi_v16x8(a, b) wasm_v8x16_shuffle(a, b, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31)
|
||||
#define wasmx_shuffle_v32x4(v, i, j, k, l) wasm_v8x16_shuffle(v, v, 4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3, 4 * j, 4 * j + 1, 4 * j + 2, 4 * j + 3, 4 * k, 4 * k + 1, 4 * k + 2, 4 * k + 3, 4 * l, 4 * l + 1, 4 * l + 2, 4 * l + 3)
|
||||
#define wasmx_splat_v32x4(v, i) wasmx_shuffle_v32x4(v, i, i, i, i)
|
||||
#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
|
||||
#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
|
||||
#define wasmx_unpacklo_v16x8(a, b) wasm_v8x16_shuffle(a, b, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23)
|
||||
#define wasmx_unpackhi_v16x8(a, b) wasm_v8x16_shuffle(a, b, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31)
|
||||
#define wasmx_unpacklo_v64x2(a, b) wasm_v8x16_shuffle(a, b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)
|
||||
#define wasmx_unpackhi_v64x2(a, b) wasm_v8x16_shuffle(a, b, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31)
|
||||
#endif
|
||||
|
||||
namespace meshopt
|
||||
@ -414,6 +417,9 @@ static const unsigned char* decodeVertexBlock(const unsigned char* data, const u
|
||||
static unsigned char kDecodeBytesGroupShuffle[256][8];
|
||||
static unsigned char kDecodeBytesGroupCount[256];
|
||||
|
||||
#ifdef EMSCRIPTEN
|
||||
__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop!
|
||||
#endif
|
||||
static bool decodeBytesGroupBuildTables()
|
||||
{
|
||||
for (int mask = 0; mask < 256; ++mask)
|
||||
@ -706,24 +712,23 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
|
||||
|
||||
v128_t sm1r = wasm_i8x16_add(sm1, sm1off);
|
||||
|
||||
return wasm_v8x16_shuffle(sm0, sm1r, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
|
||||
return wasmx_unpacklo_v64x2(sm0, sm1r);
|
||||
}
|
||||
|
||||
static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
|
||||
{
|
||||
uint64_t mbits = 0x8040201008040201ull;
|
||||
v128_t mask_0 = wasmx_shuffle_v32x4(mask, 0, 2, 1, 3);
|
||||
|
||||
uint64_t m0_8 = wasm_i64x2_extract_lane(mask, 0) & mbits;
|
||||
uint64_t m1_8 = wasm_i64x2_extract_lane(mask, 1) & mbits;
|
||||
// TODO: when Chrome supports v128.const we can try doing vectorized and?
|
||||
uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
|
||||
uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
|
||||
|
||||
uint32_t m0_4 = m0_8 | (m0_8 >> 32);
|
||||
uint32_t m1_4 = m1_8 | (m1_8 >> 32);
|
||||
uint64_t mask_2 = mask_1a | mask_1b;
|
||||
uint64_t mask_4 = mask_2 | (mask_2 >> 16);
|
||||
uint64_t mask_8 = mask_4 | (mask_4 >> 8);
|
||||
|
||||
uint16_t m0_2 = m0_4 | (m0_4 >> 16);
|
||||
uint16_t m1_2 = m1_4 | (m1_4 >> 16);
|
||||
|
||||
mask0 = m0_2 | (m0_2 >> 8);
|
||||
mask1 = m1_2 | (m1_2 >> 8);
|
||||
mask0 = uint8_t(mask_8);
|
||||
mask1 = uint8_t(mask_8 >> 32);
|
||||
}
|
||||
|
||||
static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
|
||||
@ -748,19 +753,12 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
v128_t sel2 = wasm_v128_load(data);
|
||||
v128_t rest = wasm_v128_load(data + 4);
|
||||
|
||||
v128_t sel22 = wasm_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2);
|
||||
v128_t sel2222 = wasm_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22);
|
||||
v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2);
|
||||
v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22);
|
||||
v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3));
|
||||
|
||||
v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3));
|
||||
|
||||
if (!wasm_i8x16_any_true(mask))
|
||||
{
|
||||
wasm_v128_store(buffer, sel);
|
||||
|
||||
return data + 4;
|
||||
}
|
||||
|
||||
unsigned char mask0, mask1;
|
||||
wasmMoveMask(mask, mask0, mask1);
|
||||
|
||||
@ -780,18 +778,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
v128_t sel4 = wasm_v128_load(data);
|
||||
v128_t rest = wasm_v128_load(data + 8);
|
||||
|
||||
v128_t sel44 = wasm_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4);
|
||||
v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4);
|
||||
v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15));
|
||||
|
||||
v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15));
|
||||
|
||||
if (!wasm_i8x16_any_true(mask))
|
||||
{
|
||||
wasm_v128_store(buffer, sel);
|
||||
|
||||
return data + 8;
|
||||
}
|
||||
|
||||
unsigned char mask0, mask1;
|
||||
wasmMoveMask(mask, mask0, mask1);
|
||||
|
||||
@ -871,15 +862,15 @@ static uint8x16_t unzigzag8(uint8x16_t v)
|
||||
#ifdef SIMD_WASM
|
||||
static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3)
|
||||
{
|
||||
v128_t t0 = wasm_unpacklo_v8x16(x0, x1);
|
||||
v128_t t1 = wasm_unpackhi_v8x16(x0, x1);
|
||||
v128_t t2 = wasm_unpacklo_v8x16(x2, x3);
|
||||
v128_t t3 = wasm_unpackhi_v8x16(x2, x3);
|
||||
v128_t t0 = wasmx_unpacklo_v8x16(x0, x1);
|
||||
v128_t t1 = wasmx_unpackhi_v8x16(x0, x1);
|
||||
v128_t t2 = wasmx_unpacklo_v8x16(x2, x3);
|
||||
v128_t t3 = wasmx_unpackhi_v8x16(x2, x3);
|
||||
|
||||
x0 = wasm_unpacklo_v16x8(t0, t2);
|
||||
x1 = wasm_unpackhi_v16x8(t0, t2);
|
||||
x2 = wasm_unpacklo_v16x8(t1, t3);
|
||||
x3 = wasm_unpackhi_v16x8(t1, t3);
|
||||
x0 = wasmx_unpacklo_v16x8(t0, t2);
|
||||
x1 = wasmx_unpackhi_v16x8(t0, t2);
|
||||
x2 = wasmx_unpacklo_v16x8(t1, t3);
|
||||
x3 = wasmx_unpackhi_v16x8(t1, t3);
|
||||
}
|
||||
|
||||
static v128_t unzigzag8(v128_t v)
|
||||
@ -977,7 +968,7 @@ static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, con
|
||||
#define TEMP v128_t
|
||||
#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) // TODO: use wasm_v32x4_load_splat to avoid buffer overrun
|
||||
#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned)
|
||||
#define GRP4(i) t0 = wasm_v32x4_splat(r##i, 0), t1 = wasm_v32x4_splat(r##i, 1), t2 = wasm_v32x4_splat(r##i, 2), t3 = wasm_v32x4_splat(r##i, 3)
|
||||
#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3)
|
||||
#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i)
|
||||
#define SAVE(i) *reinterpret_cast<int*>(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size
|
||||
#endif
|
||||
@ -1157,12 +1148,6 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve
|
||||
decode = decodeVertexBlock;
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_WASM)
|
||||
// TODO: workaround for https://github.com/emscripten-core/emscripten/issues/9767
|
||||
if (!gDecodeBytesGroupInitialized)
|
||||
gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables();
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
assert(gDecodeBytesGroupInitialized);
|
||||
(void)gDecodeBytesGroupInitialized;
|
||||
|
Loading…
x
Reference in New Issue
Block a user