stb_image: New Paeth filter

This formulation is equivalent to the original (reference)
implementation but runs _significantly_ faster - this speeds up
the filtering portion of a Paeth-heavy 8192x8192 16-bit/channel
image by a factor of more than 2 on a Zen2 CPU.

I'm investigating doing a more thorough restructuring of this pass,
but this seems like a good first step.
This commit is contained in:
Fabian Giesen 2023-05-02 01:17:05 -07:00 committed by Sean Barrett
parent ed64333410
commit 07268cbf36
2 changed files with 56 additions and 7 deletions

View File

@ -4654,13 +4654,15 @@ static stbi_uc first_row_filter[5] =
static int stbi__paeth(int a, int b, int c)
{
int p = a + b - c;
int pa = abs(p-a);
int pb = abs(p-b);
int pc = abs(p-c);
if (pa <= pb && pa <= pc) return a;
if (pb <= pc) return b;
return c;
// This formulation looks very different from the reference in the PNG spec, but is
// actually equivalent and has favorable data dependencies and admits straightforward
// generation of branch-free code, which helps performance significantly.
int thresh = c*3 - (a + b);
int lo = a < b ? a : b;
int hi = a < b ? b : a;
int t0 = (hi <= thresh) ? lo : c;
int t1 = (thresh <= lo) ? hi : t0;
return t1;
}
static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };

47
tests/test_png_paeth.c Normal file
View File

@ -0,0 +1,47 @@
#include <stdio.h>
#include <stdlib.h>
// Reference Paeth filter as per PNG spec
static int ref_paeth(int a, int b, int c)
{
int p = a + b - c;
int pa = abs(p-a);
int pb = abs(p-b);
int pc = abs(p-c);
if (pa <= pb && pa <= pc) return a;
if (pb <= pc) return b;
return c;
}
// Optimized Paeth filter
static int opt_paeth(int a, int b, int c)
{
int thresh = c*3 - (a + b);
int lo = a < b ? a : b;
int hi = a < b ? b : a;
int t0 = (hi <= thresh) ? lo : c;
int t1 = (thresh <= lo) ? hi : t0;
return t1;
}
int main()
{
// Exhaustively test the functions match for all byte inputs a, b,c in [0,255]
for (int i = 0; i < (1 << 24); ++i) {
int a = i & 0xff;
int b = (i >> 8) & 0xff;
int c = (i >> 16) & 0xff;
int ref = ref_paeth(a, b, c);
int opt = opt_paeth(a, b, c);
if (ref != opt) {
fprintf(stderr, "mismatch at a=%3d b=%3d c=%3d: ref=%3d opt=%3d\n", a, b, c, ref, opt);
return 1;
}
}
printf("all ok!\n");
return 0;
}
// vim:sw=3:sts=3:et