mirror of
https://github.com/nothings/stb
synced 2024-12-15 04:22:35 +03:00
various fixes
This commit is contained in:
parent
28630fb253
commit
9e5e19f5a3
@ -341,16 +341,22 @@ enum
|
|||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
STBVOX_TEXLERP4_0_8,
|
STBVOX_TEXLERP_BASE_0, // 0.0
|
||||||
STBVOX_TEXLERP4_1_8,
|
STBVOX_TEXLERP_BASE_2_7, // 2/7
|
||||||
STBVOX_TEXLERP4_2_8,
|
STBVOX_TEXLERP_BASE_5_7, // 4/7
|
||||||
STBVOX_TEXLERP4_3_8,
|
STBVOX_TEXLERP_BASE_1 // 1.0
|
||||||
STBVOX_TEXLERP4_4_8,
|
};
|
||||||
STBVOX_TEXLERP4_5_8,
|
|
||||||
STBVOX_TEXLERP4_6_8,
|
|
||||||
STBVOX_TEXLERP4_7_8,
|
|
||||||
|
|
||||||
STBVOX_TEXLERP4_use_vert=15,
|
enum
|
||||||
|
{
|
||||||
|
STBVOX_TEXLERP3_0_8,
|
||||||
|
STBVOX_TEXLERP3_1_8,
|
||||||
|
STBVOX_TEXLERP3_2_8,
|
||||||
|
STBVOX_TEXLERP3_3_8,
|
||||||
|
STBVOX_TEXLERP3_4_8,
|
||||||
|
STBVOX_TEXLERP3_5_8,
|
||||||
|
STBVOX_TEXLERP3_6_8,
|
||||||
|
STBVOX_TEXLERP3_7_8,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
@ -365,6 +371,7 @@ enum
|
|||||||
STBVOX_FACE__count,
|
STBVOX_FACE__count,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define STBVOX_FACE_NONE 7
|
||||||
|
|
||||||
#define STBVOX_BLOCKTYPE_EMPTY 0
|
#define STBVOX_BLOCKTYPE_EMPTY 0
|
||||||
|
|
||||||
@ -379,6 +386,7 @@ enum
|
|||||||
#define STBVOX_MAKE_MATROT(block, overlay, tex2, color) ((block) + (overlay)*4 + (tex2)*16 + (color)*64)
|
#define STBVOX_MAKE_MATROT(block, overlay, tex2, color) ((block) + (overlay)*4 + (tex2)*16 + (color)*64)
|
||||||
#define STBVOX_MAKE_TEX2_REPLACE(tex2, tex2_replace_face) ((tex2) + ((tex2_replace_face) & 3)*64)
|
#define STBVOX_MAKE_TEX2_REPLACE(tex2, tex2_replace_face) ((tex2) + ((tex2_replace_face) & 3)*64)
|
||||||
#define STBVOX_MAKE_TEXLERP(ns2, ew2, ud2, vert) ((ew2) + (ns2)*4 + (ud2)*16 + (vert)*64)
|
#define STBVOX_MAKE_TEXLERP(ns2, ew2, ud2, vert) ((ew2) + (ns2)*4 + (ud2)*16 + (vert)*64)
|
||||||
|
#define STBVOX_MAKE_TEXLERP_SIMPLE(baselerp,vert,face) ((vert)*32 + (face)*4 + (baselerp))
|
||||||
#define STBVOX_MAKE_TEXLERP1(vert,e2,n2,w2,s2,u4,d2) STBVOX_MAKE_TEXLERP(s2, w2, d2, vert)
|
#define STBVOX_MAKE_TEXLERP1(vert,e2,n2,w2,s2,u4,d2) STBVOX_MAKE_TEXLERP(s2, w2, d2, vert)
|
||||||
#define STBVOX_MAKE_TEXLERP2(vert,e2,n2,w2,s2,u4,d2) ((u2)*16 + (n2)*4 + (s2))
|
#define STBVOX_MAKE_TEXLERP2(vert,e2,n2,w2,s2,u4,d2) ((u2)*16 + (n2)*4 + (s2))
|
||||||
#define STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d) ((e)+(n)*2+(w)*4+(s)*8+(u)*16+(d)*32)
|
#define STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d) ((e)+(n)*2+(w)*4+(s)*8+(u)*16+(d)*32)
|
||||||
@ -419,6 +427,7 @@ struct stbvox_input_description
|
|||||||
unsigned char *vheight; // STBVOX_MAKE_VHEIGHT -- sw:2, se:2, nw:2, ne:2, doesn't rotate
|
unsigned char *vheight; // STBVOX_MAKE_VHEIGHT -- sw:2, se:2, nw:2, ne:2, doesn't rotate
|
||||||
unsigned char *texlerp; // STBVOX_MAKE_TEXLERP -- vert:2, ud:2, ew:2, ns:2
|
unsigned char *texlerp; // STBVOX_MAKE_TEXLERP -- vert:2, ud:2, ew:2, ns:2
|
||||||
unsigned char *texlerp2; // STBVOX_MAKE_TEXLERP2 (and use STBVOX_MAKE_TEXLERP1 for 'texlerp' -- e:2, n:2, u:3, unused:1
|
unsigned char *texlerp2; // STBVOX_MAKE_TEXLERP2 (and use STBVOX_MAKE_TEXLERP1 for 'texlerp' -- e:2, n:2, u:3, unused:1
|
||||||
|
unsigned char *texlerp_simple; // STBVOX_MAKE_TEXLERP_SIMPLE -- baselerp:2, vert_lerp:3, face_to_use_vert_lerp:3
|
||||||
unsigned short *texlerp_vert3; // e:3,n:3,w:3,s:3,u:3 (down comes from 'texlerp')
|
unsigned short *texlerp_vert3; // e:3,n:3,w:3,s:3,u:3 (down comes from 'texlerp')
|
||||||
unsigned short *texlerp_face3; // e:3,n:3,w:3,s:3,u:2,d:2
|
unsigned short *texlerp_face3; // e:3,n:3,w:3,s:3,u:2,d:2
|
||||||
unsigned char *lighting; // lighting:8
|
unsigned char *lighting; // lighting:8
|
||||||
@ -944,7 +953,7 @@ static char *stbvox_fragment_program =
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
" if (texblend_mode)\n"
|
" if (texblend_mode)\n"
|
||||||
" albedo = tex1.xyz * rlerp(tex2.a, 2.0*tex2.xyz, vec3(1.0,1.0,1.0));\n"
|
" albedo = tex1.xyz * rlerp(tex2.a, vec3(1.0,1.0,1.0), 2.0*tex2.xyz);\n"
|
||||||
" else {\n"
|
" else {\n"
|
||||||
#ifdef STBVOX_CONFIG_PREMULTIPLIED_ALPHA
|
#ifdef STBVOX_CONFIG_PREMULTIPLIED_ALPHA
|
||||||
" albedo = (1.0-tex2.a)*tex1.xyz + tex2.xyz;\n"
|
" albedo = (1.0-tex2.a)*tex1.xyz + tex2.xyz;\n"
|
||||||
@ -1314,10 +1323,10 @@ stbvox_mesh_face stbvox_compute_mesh_face_value(stbvox_mesh_maker *mm, stbvox_ro
|
|||||||
|
|
||||||
static unsigned char stbvox_face_lerp[6] = { 0,2,0,2,4,4 };
|
static unsigned char stbvox_face_lerp[6] = { 0,2,0,2,4,4 };
|
||||||
static unsigned char stbvox_vert3_lerp[6] = { 0,3,6,9,12,12 };
|
static unsigned char stbvox_vert3_lerp[6] = { 0,3,6,9,12,12 };
|
||||||
static unsigned char stbvox_vert_lerp_for_face_lerp[6] = { 0, 4, 7 };
|
static unsigned char stbvox_vert_lerp_for_face_lerp[4] = { 0, 4, 7, 7 };
|
||||||
static unsigned char stbvox_face3_lerp[6] = { 0,3,6,9,12,14 };
|
static unsigned char stbvox_face3_lerp[6] = { 0,3,6,9,12,14 };
|
||||||
static unsigned char stbvox_face3_updown[8] = { 0,2,4,7,0,2,4,7 };
|
static unsigned char stbvox_face3_updown[8] = { 0,2,4,7,0,2,4,7 };
|
||||||
|
static unsigned char stbvox_vert_lerp_for_simple[4] = { 0,2,5,7 };
|
||||||
// vertex offsets for face vertices
|
// vertex offsets for face vertices
|
||||||
static unsigned char stbvox_vertex_vector[6][4][3];
|
static unsigned char stbvox_vertex_vector[6][4][3];
|
||||||
static stbvox_mesh_vertex stbvox_vmesh_delta_normal[6][4];
|
static stbvox_mesh_vertex stbvox_vmesh_delta_normal[6][4];
|
||||||
@ -1376,6 +1385,22 @@ void stbvox_make_mesh_for_face(stbvox_mesh_maker *mm, stbvox_rotate rot, int fac
|
|||||||
if (face >= 4)
|
if (face >= 4)
|
||||||
val = stbvox_face3_updown[val];
|
val = stbvox_face3_updown[val];
|
||||||
p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,val);
|
p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,val);
|
||||||
|
} else if (mm->input.texlerp_simple) {
|
||||||
|
unsigned char val = mm->input.texlerp_simple[v_off];
|
||||||
|
unsigned char lerp_face = (val >> 2) & 7;
|
||||||
|
if (lerp_face == face) {
|
||||||
|
p1[0] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][0]] >> 5) & 7;
|
||||||
|
p1[1] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][1]] >> 5) & 7;
|
||||||
|
p1[2] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][2]] >> 5) & 7;
|
||||||
|
p1[3] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][3]] >> 5) & 7;
|
||||||
|
p1[0] = stbvox_vertex_encode(0,0,0,0,p1[0]);
|
||||||
|
p1[1] = stbvox_vertex_encode(0,0,0,0,p1[1]);
|
||||||
|
p1[2] = stbvox_vertex_encode(0,0,0,0,p1[2]);
|
||||||
|
p1[3] = stbvox_vertex_encode(0,0,0,0,p1[3]);
|
||||||
|
} else {
|
||||||
|
unsigned char base = stbvox_vert_lerp_for_simple[val&3];
|
||||||
|
p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,base);
|
||||||
|
}
|
||||||
} else if (mm->input.texlerp) {
|
} else if (mm->input.texlerp) {
|
||||||
unsigned char facelerp = (mm->input.texlerp[v_off] >> stbvox_face_lerp[face]) & 3;
|
unsigned char facelerp = (mm->input.texlerp[v_off] >> stbvox_face_lerp[face]) & 3;
|
||||||
if (facelerp == STBVOX_TEXLERP_use_vert) {
|
if (facelerp == STBVOX_TEXLERP_use_vert) {
|
||||||
@ -1407,6 +1432,7 @@ void stbvox_make_mesh_for_face(stbvox_mesh_maker *mm, stbvox_rotate rot, int fac
|
|||||||
stbvox_get_quad_vertex_pointer(mm, mesh, mv, face_data);
|
stbvox_get_quad_vertex_pointer(mm, mesh, mv, face_data);
|
||||||
|
|
||||||
if (mm->input.lighting) {
|
if (mm->input.lighting) {
|
||||||
|
// @TODO: lighting at block centers, but not gathered, instead constant-per-face
|
||||||
if (mm->input.lighting_at_vertices) {
|
if (mm->input.lighting_at_vertices) {
|
||||||
int i;
|
int i;
|
||||||
for (i=0; i < 4; ++i) {
|
for (i=0; i < 4; ++i) {
|
||||||
@ -2128,7 +2154,7 @@ int stbvox_make_mesh(stbvox_mesh_maker *mm)
|
|||||||
int x,y;
|
int x,y;
|
||||||
stbvox_bring_up_to_date(mm);
|
stbvox_bring_up_to_date(mm);
|
||||||
mm->full = 0;
|
mm->full = 0;
|
||||||
if (mm->cur_x || mm->cur_y || mm->cur_z) {
|
if (mm->cur_x > mm->x0 || mm->cur_y > mm->y0 || mm->cur_z > mm->z0) {
|
||||||
stbvox_make_mesh_for_column(mm, mm->cur_x, mm->cur_y, mm->cur_z);
|
stbvox_make_mesh_for_column(mm, mm->cur_x, mm->cur_y, mm->cur_z);
|
||||||
if (mm->full)
|
if (mm->full)
|
||||||
return 0;
|
return 0;
|
||||||
@ -2139,8 +2165,9 @@ int stbvox_make_mesh(stbvox_mesh_maker *mm)
|
|||||||
return 0;
|
return 0;
|
||||||
++mm->cur_y;
|
++mm->cur_y;
|
||||||
}
|
}
|
||||||
|
++mm->cur_x;
|
||||||
}
|
}
|
||||||
for (x=mm->x0; x < mm->x1; ++x) {
|
for (x=mm->cur_x; x < mm->x1; ++x) {
|
||||||
for (y=mm->y0; y < mm->y1; ++y) {
|
for (y=mm->y0; y < mm->y1; ++y) {
|
||||||
stbvox_make_mesh_for_column(mm, x, y, mm->z0);
|
stbvox_make_mesh_for_column(mm, x, y, mm->z0);
|
||||||
if (mm->full) {
|
if (mm->full) {
|
||||||
@ -2805,8 +2832,6 @@ static stbvox_face_up_normal_012[4][4][4] =
|
|||||||
{ STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u , },
|
{ STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u , },
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// 013[3][3][1]
|
|
||||||
// 023[3][1][1]
|
|
||||||
|
|
||||||
static stbvox_face_up_normal_013[4][4][4] =
|
static stbvox_face_up_normal_013[4][4][4] =
|
||||||
{
|
{
|
||||||
|
@ -341,10 +341,12 @@ int screen_x,screen_y;
|
|||||||
float carried_dt = 0;
|
float carried_dt = 0;
|
||||||
#define TICKRATE 60
|
#define TICKRATE 60
|
||||||
|
|
||||||
|
float tex2_alpha = 1.0;
|
||||||
|
|
||||||
int raw_level_time;
|
int raw_level_time;
|
||||||
|
|
||||||
float global_timer;
|
float global_timer;
|
||||||
|
int global_hack;
|
||||||
|
|
||||||
int loopmode(float dt, int real, int in_client)
|
int loopmode(float dt, int real, int in_client)
|
||||||
{
|
{
|
||||||
@ -360,6 +362,11 @@ int loopmode(float dt, int real, int in_client)
|
|||||||
|
|
||||||
carried_dt += dt;
|
carried_dt += dt;
|
||||||
while (carried_dt > 1.0/TICKRATE) {
|
while (carried_dt > 1.0/TICKRATE) {
|
||||||
|
if (global_hack) {
|
||||||
|
tex2_alpha += global_hack / 60.0f;
|
||||||
|
if (tex2_alpha < 0) tex2_alpha = 0;
|
||||||
|
if (tex2_alpha > 1) tex2_alpha = 1;
|
||||||
|
}
|
||||||
//update_input();
|
//update_input();
|
||||||
// if the player is dead, stop the sim
|
// if the player is dead, stop the sim
|
||||||
carried_dt -= 1.0/TICKRATE;
|
carried_dt -= 1.0/TICKRATE;
|
||||||
@ -432,6 +439,8 @@ void process_event(SDL_Event *e)
|
|||||||
if (s == SDL_SCANCODE_LCTRL) active_control_set(5);
|
if (s == SDL_SCANCODE_LCTRL) active_control_set(5);
|
||||||
if (s == SDL_SCANCODE_S) active_control_set(6);
|
if (s == SDL_SCANCODE_S) active_control_set(6);
|
||||||
if (s == SDL_SCANCODE_D) active_control_set(7);
|
if (s == SDL_SCANCODE_D) active_control_set(7);
|
||||||
|
if (k == '1') global_hack = !global_hack;
|
||||||
|
if (k == '2') global_hack = -1;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
if (game_mode == GAME_editor) {
|
if (game_mode == GAME_editor) {
|
||||||
@ -568,7 +577,7 @@ int SDL_main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SDL_GL_SetSwapInterval(0); // only when profiling
|
SDL_GL_SetSwapInterval(1);
|
||||||
|
|
||||||
render_init();
|
render_init();
|
||||||
mesh_init();
|
mesh_init();
|
||||||
|
@ -32,11 +32,11 @@
|
|||||||
|
|
||||||
extern void ods(char *fmt, ...);
|
extern void ods(char *fmt, ...);
|
||||||
|
|
||||||
#define FANCY_LEAVES // nearly 2x the triangles when enabled (if underground is filled)
|
//#define FANCY_LEAVES // nearly 2x the triangles when enabled (if underground is filled)
|
||||||
#define FAST_CHUNK
|
#define FAST_CHUNK
|
||||||
#define IN_PLACE
|
#define IN_PLACE
|
||||||
|
|
||||||
#define SKIP_TERRAIN 0 // use to avoid building underground stuff
|
#define SKIP_TERRAIN 48 // use to avoid building underground stuff
|
||||||
// allows you to see what perf would be like if underground was efficiently culled,
|
// allows you to see what perf would be like if underground was efficiently culled,
|
||||||
// or if you were making a game without underground
|
// or if you were making a game without underground
|
||||||
|
|
||||||
@ -594,6 +594,7 @@ void make_map_segment_for_superchunk_preconvert(int chunk_x, int chunk_y, int se
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// build 1 mesh covering 2x2 chunks
|
||||||
void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh *rm)
|
void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh *rm)
|
||||||
{
|
{
|
||||||
int a,b,z;
|
int a,b,z;
|
||||||
@ -603,6 +604,10 @@ void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh
|
|||||||
unsigned char vheight[34][34][18];
|
unsigned char vheight[34][34][18];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef STBVOX_CONFIG_DISABLE_TEX2
|
||||||
|
unsigned char tex2_choice[34][34][18];
|
||||||
|
#endif
|
||||||
|
|
||||||
assert((chunk_x & 1) == 0);
|
assert((chunk_x & 1) == 0);
|
||||||
assert((chunk_y & 1) == 0);
|
assert((chunk_y & 1) == 0);
|
||||||
|
|
||||||
@ -618,7 +623,6 @@ void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh
|
|||||||
map->block_color_face = minecraft_color_for_blocktype;
|
map->block_color_face = minecraft_color_for_blocktype;
|
||||||
map->block_geometry = minecraft_geom_for_blocktype;
|
map->block_geometry = minecraft_geom_for_blocktype;
|
||||||
|
|
||||||
// we're going to build 4 meshes in parallel, each covering 2x2 chunks
|
|
||||||
stbvox_reset_buffers(&rm->mm);
|
stbvox_reset_buffers(&rm->mm);
|
||||||
stbvox_set_buffer(&rm->mm, 0, 0, rm->build_buffer, BUILD_BUFFER_SIZE);
|
stbvox_set_buffer(&rm->mm, 0, 0, rm->build_buffer, BUILD_BUFFER_SIZE);
|
||||||
stbvox_set_buffer(&rm->mm, 0, 1, rm->face_buffer , FACE_BUFFER_SIZE);
|
stbvox_set_buffer(&rm->mm, 0, 1, rm->face_buffer , FACE_BUFFER_SIZE);
|
||||||
@ -636,6 +640,25 @@ void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef STBVOX_CONFIG_DISABLE_TEX2
|
||||||
|
for (a=0; a < 34; ++a) {
|
||||||
|
for (b=0; b < 34; ++b) {
|
||||||
|
int px = chunk_x*16 + a - 1;
|
||||||
|
int py = chunk_y*16 + b - 1;
|
||||||
|
float dist = (float) sqrt(px*px + py*py);
|
||||||
|
float s1 = (float) sin(dist / 16), s2, s3;
|
||||||
|
dist = (float) sqrt((px-80)*(px-80) + (py-50)*(py-50));
|
||||||
|
s2 = (float) sin(dist / 11);
|
||||||
|
for (z=0; z < 18; ++z) {
|
||||||
|
s3 = (float) sin(z * 3.141592 / 8);
|
||||||
|
|
||||||
|
s3 = s1*s2*s3;
|
||||||
|
tex2_choice[a][b][z] = 63 & (int) stb_linear_remap(s3,-1,1, -20,83);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (z=256-16; z >= SKIP_TERRAIN; z -= 16)
|
for (z=256-16; z >= SKIP_TERRAIN; z -= 16)
|
||||||
{
|
{
|
||||||
int z0 = z;
|
int z0 = z;
|
||||||
@ -646,6 +669,9 @@ void build_chunk(int chunk_x, int chunk_y, fast_chunk *fc_table[4][4], raw_mesh
|
|||||||
|
|
||||||
map->blocktype = &rm->sv_blocktype[1][1][1-z]; // specify location of 0,0,0 so that accessing z0..z1 gets right data
|
map->blocktype = &rm->sv_blocktype[1][1][1-z]; // specify location of 0,0,0 so that accessing z0..z1 gets right data
|
||||||
map->lighting = &rm->sv_lighting[1][1][1-z];
|
map->lighting = &rm->sv_lighting[1][1][1-z];
|
||||||
|
#ifndef STBVOX_CONFIG_DISABLE_TEX2
|
||||||
|
map->tex2 = &tex2_choice[1][1][1-z];
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef VHEIGHT_TEST
|
#ifdef VHEIGHT_TEST
|
||||||
// hacky test of vheight
|
// hacky test of vheight
|
||||||
@ -844,3 +870,58 @@ void mesh_init(void)
|
|||||||
remap_in_place(54, 9);
|
remap_in_place(54, 9);
|
||||||
remap_in_place(146, 10);
|
remap_in_place(146, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Timing stats while optimizing the single-threaded builder
|
||||||
|
|
||||||
|
// 32..-32, 32..-32, SKIP_TERRAIN=0, !FANCY_LEAVES on 'mcrealm' data set
|
||||||
|
|
||||||
|
// 6.27s - reblocked to do 16 z at a time instead of 256 (still using 66x66x258), 4 meshes in parallel
|
||||||
|
// 5.96s - reblocked to use FAST_CHUNK (no intermediate data structure)
|
||||||
|
// 5.45s - unknown change, or previous measurement was wrong
|
||||||
|
|
||||||
|
// 6.12s - use preconverted data, not in-place
|
||||||
|
// 5.91s - use preconverted, in-place
|
||||||
|
// 5.34s - preconvert, in-place, avoid dependency chain (suggested by ryg)
|
||||||
|
// 5.34s - preconvert, in-place, avoid dependency chain, use bit-table instead of byte-table
|
||||||
|
// 5.50s - preconvert, in-place, branchless
|
||||||
|
|
||||||
|
// 6.42s - non-preconvert, avoid dependency chain (not an error)
|
||||||
|
// 5.40s - non-preconvert, w/dependency chain (same as earlier)
|
||||||
|
|
||||||
|
// 5.50s - non-FAST_CHUNK, reblocked outer loop for better cache reuse
|
||||||
|
// 4.73s - FAST_CHUNK non-preconvert, reblocked outer loop
|
||||||
|
// 4.25s - preconvert, in-place, reblocked outer loop
|
||||||
|
// 4.18s - preconvert, in-place, unrolled again
|
||||||
|
// 4.10s - 34x34 1 mesh instead of 66x66 and 4 meshes (will make it easier to do multiple threads)
|
||||||
|
|
||||||
|
// 4.83s - building bitmasks but not using them (2 bits per block, one if empty, one if solid)
|
||||||
|
|
||||||
|
// 5.16s - using empty bitmasks to early out
|
||||||
|
// 5.01s - using solid & empty bitmasks to early out - "foo"
|
||||||
|
// 4.64s - empty bitmask only, test 8 at a time, then test geom
|
||||||
|
// 4.72s - empty bitmask only, 8 at a time, then test bits
|
||||||
|
// 4.46s - split bitmask building into three loops (each byte is separate)
|
||||||
|
// 4.42s - further optimize computing bitmask
|
||||||
|
|
||||||
|
// 4.58s - using solid & empty bitmasks to early out, same as "foo" but faster bitmask building
|
||||||
|
// 4.12s - using solid & empty bitmasks to efficiently test neighbors
|
||||||
|
// 4.04s - using 16-bit fetches (not endian-independent)
|
||||||
|
// - note this is first place that beats previous best '4.10s - 34x34 1 mesh'
|
||||||
|
|
||||||
|
// 4.30s - current time with bitmasks disabled again (note was 4.10s earlier)
|
||||||
|
// 3.95s - bitmasks enabled again, no other changes
|
||||||
|
// 4.00s - current time with bitmasks disabled again, no other changes -- wide variation that is time dependent?
|
||||||
|
// (note that most of the numbers listed here are median of 3 values already)
|
||||||
|
// 3.98s - bitmasks enabled
|
||||||
|
|
||||||
|
// Bitmasks removed from the code as not worth the complexity increase
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Raw data for Q&A:
|
||||||
|
//
|
||||||
|
// 26% parsing & loading minecraft files (4/5ths of which is zlib decode)
|
||||||
|
// 39% building mesh from stb input format
|
||||||
|
// 18% converting from minecraft blocks to stb blocks
|
||||||
|
// 9% reordering from minecraft axis order to stb axis order
|
||||||
|
// 7% uploading vertex buffer to OpenGL
|
||||||
|
@ -364,6 +364,42 @@ int num_meshes_started; // stats
|
|||||||
int request_chunk(int chunk_x, int chunk_y);
|
int request_chunk(int chunk_x, int chunk_y);
|
||||||
void update_meshes_from_render_thread(void);
|
void update_meshes_from_render_thread(void);
|
||||||
|
|
||||||
|
unsigned char tex2_data[64][4];
|
||||||
|
|
||||||
|
void init_tex2_gradient(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i=0; i < 16; ++i) {
|
||||||
|
tex2_data[i+ 0][0] = 64 + 12*i;
|
||||||
|
tex2_data[i+ 0][1] = 32;
|
||||||
|
tex2_data[i+ 0][2] = 64;
|
||||||
|
|
||||||
|
tex2_data[i+16][0] = 255;
|
||||||
|
tex2_data[i+16][1] = 32 + 8*i;
|
||||||
|
tex2_data[i+16][2] = 64;
|
||||||
|
|
||||||
|
tex2_data[i+32][0] = 255;
|
||||||
|
tex2_data[i+32][1] = 160;
|
||||||
|
tex2_data[i+32][2] = 64 + 12*i;
|
||||||
|
|
||||||
|
tex2_data[i+48][0] = 255;
|
||||||
|
tex2_data[i+48][1] = 160 + 6*i;
|
||||||
|
tex2_data[i+48][2] = 255;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_tex2_alpha(float fa)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int a = (int) stb_lerp(fa, 0, 255);
|
||||||
|
if (a < 0) a = 0; else if (a > 255) a = 255;
|
||||||
|
glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
|
||||||
|
for (i=0; i < 64; ++i) {
|
||||||
|
tex2_data[i][3] = a;
|
||||||
|
glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void render_init(void)
|
void render_init(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -424,64 +460,21 @@ void render_init(void)
|
|||||||
|
|
||||||
glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
|
glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
|
||||||
glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
|
glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
|
||||||
TEX_SIZE,TEX_SIZE,128,
|
1,1,64,
|
||||||
0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
|
0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
|
||||||
|
init_tex2_gradient();
|
||||||
|
set_tex2_alpha(0.0);
|
||||||
|
#if 0
|
||||||
for (i=0; i < 128; ++i) {
|
for (i=0; i < 128; ++i) {
|
||||||
build_overlay_texture(i);
|
//build_overlay_texture(i);
|
||||||
glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
|
glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
|
glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
|
||||||
glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
|
glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Timing stats while optimizing the single-threaded builder
|
|
||||||
|
|
||||||
// 32..-32, 32..-32, !FILL_TERRAIN, !FANCY_LEAVES on 'mcrealm' data set
|
|
||||||
|
|
||||||
// 6.27s - reblocked to do 16 z at a time instead of 256 (still using 66x66x258), 4 meshes in parallel
|
|
||||||
// 5.96s - reblocked to use FAST_CHUNK (no intermediate data structure)
|
|
||||||
// 5.45s - unknown change, or previous measurement was wrong
|
|
||||||
|
|
||||||
// 6.12s - use preconverted data, not in-place
|
|
||||||
// 5.91s - use preconverted, in-place
|
|
||||||
// 5.34s - preconvert, in-place, avoid dependency chain (suggested by ryg)
|
|
||||||
// 5.34s - preconvert, in-place, avoid dependency chain, use bit-table instead of byte-table
|
|
||||||
// 5.50s - preconvert, in-place, branchless
|
|
||||||
|
|
||||||
// 6.42s - non-preconvert, avoid dependency chain (not an error)
|
|
||||||
// 5.40s - non-preconvert, w/dependency chain (same as earlier)
|
|
||||||
|
|
||||||
// 5.50s - non-FAST_CHUNK, reblocked outer loop for better cache reuse
|
|
||||||
// 4.73s - FAST_CHUNK non-preconvert, reblocked outer loop
|
|
||||||
// 4.25s - preconvert, in-place, reblocked outer loop
|
|
||||||
// 4.18s - preconvert, in-place, unrolled again
|
|
||||||
// 4.10s - 34x34 1 mesh instead of 66x66 and 4 meshes (will make it easier to do multiple threads)
|
|
||||||
|
|
||||||
// 4.83s - building bitmasks but not using them (2 bits per block, one if empty, one if solid)
|
|
||||||
|
|
||||||
// 5.16s - using empty bitmasks to early out
|
|
||||||
// 5.01s - using solid & empty bitmasks to early out - "foo"
|
|
||||||
// 4.64s - empty bitmask only, test 8 at a time, then test geom
|
|
||||||
// 4.72s - empty bitmask only, 8 at a time, then test bits
|
|
||||||
// 4.46s - split bitmask building into three loops (each byte is separate)
|
|
||||||
// 4.42s - further optimize computing bitmask
|
|
||||||
|
|
||||||
// 4.58s - using solid & empty bitmasks to early out, same as "foo" but faster bitmask building
|
|
||||||
// 4.12s - using solid & empty bitmasks to efficiently test neighbors
|
|
||||||
// 4.04s - using 16-bit fetches (not endian-independent)
|
|
||||||
// - note this is first place that beats previous best '4.10s - 34x34 1 mesh'
|
|
||||||
|
|
||||||
// 4.30s - current time with bitmasks disabled again (note was 4.10s earlier)
|
|
||||||
// 3.95s - bitmasks enabled again, no other changes
|
|
||||||
// 4.00s - current time with bitmasks disabled again, no other changes -- wide variation that is time dependent?
|
|
||||||
// (note that most of the numbers listed here are median of 3 values already)
|
|
||||||
// 3.98s - bitmasks enabled
|
|
||||||
|
|
||||||
// Bitmasks removed from the code as not worth the complexity increase
|
|
||||||
|
|
||||||
|
|
||||||
void world_init(void)
|
void world_init(void)
|
||||||
{
|
{
|
||||||
int a,b,x,y;
|
int a,b,x,y;
|
||||||
@ -751,6 +744,8 @@ void update_meshes_from_render_thread(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern float tex2_alpha;
|
||||||
|
extern int global_hack;
|
||||||
int num_threads_active;
|
int num_threads_active;
|
||||||
float chunk_server_activity;
|
float chunk_server_activity;
|
||||||
|
|
||||||
@ -790,6 +785,8 @@ void render_caves(float campos[3])
|
|||||||
stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
|
stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (global_hack)
|
||||||
|
set_tex2_alpha(tex2_alpha);
|
||||||
|
|
||||||
num_meshes_uploaded = 0;
|
num_meshes_uploaded = 0;
|
||||||
update_meshes_from_render_thread();
|
update_meshes_from_render_thread();
|
||||||
@ -952,16 +949,3 @@ void render_caves(float campos[3])
|
|||||||
num_threads_active += (mesh_data[i].state == WSTATE_running);
|
num_threads_active += (mesh_data[i].state == WSTATE_running);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Raw data for Q&A:
|
|
||||||
//
|
|
||||||
// 26% parsing & loading minecraft files (4/5ths of which is zlib decode)
|
|
||||||
// 39% building mesh from stb input format
|
|
||||||
// 18% converting from minecraft blocks to stb blocks
|
|
||||||
// 9% reordering from minecraft axis order to stb axis order
|
|
||||||
// 7% uploading vertex buffer to OpenGL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user