diff --git a/libmemdraw/draw.c b/libmemdraw/draw.c index f565997..a770402 100644 --- a/libmemdraw/draw.c +++ b/libmemdraw/draw.c @@ -10,22 +10,34 @@ static int tablesbuilt; #define RGB2K(r,g,b) ((156763*(r)+307758*(g)+59769*(b))>>19) /* - * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation. - * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation. - * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole. + * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8). + * We add another 127 to round to the nearest value rather + * than truncate. + * + * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2). + * CALC2x does two parallel 16-bit calculations on y input images (y=1,2). */ -/* #define DIV255(x) (((x)*257+256)>>16) */ -#define DIV255(x) ((((x)+1)*257)>>16) -/* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */ +#define CALC11(a, v, tmp) \ + (tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8) -#define MUL(x, y, t) (t = (x)*(y)+128, (t+(t>>8))>>8) -#define MASK13 0xFF00FF00 -#define MASK02 0x00FF00FF -#define MUL13(a, x, t) (t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02) -#define MUL02(a, x, t) (t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02) -#define MUL0123(a, x, s, t) ((MUL13(a, x, s)<<8)|MUL02(a, x, t)) +#define CALC12(a1, v1, a2, v2, tmp) \ + (tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8) -#define MUL2(u, v, x, y) (t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8) +#define MASK 0xFF00FF + +#define CALC21(a, vvuu, tmp) \ + (tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK) + +#define CALC41(a, rgba, tmp1, tmp2) \ + (CALC21(a, rgba & MASK, tmp1) | \ + (CALC21(a, (rgba>>8)&MASK, tmp2)<<8)) + +#define CALC22(a1, vvuu1, a2, vvuu2, tmp) \ + (tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK) + +#define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \ + (CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \ + (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8)) static void mktables(void); typedef int Subdraw(Memdrawparam*); @@ -307,6 +319,9 @@ drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, * Conversion tables. */ static uchar replbit[1+8][256]; /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */ +static uchar conv18[256][8]; /* conv18[x][y] is the yth pixel in the depth-1 pixel x */ +static uchar conv28[256][4]; /* ... */ +static uchar conv48[256][2]; /* * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8. @@ -340,7 +355,7 @@ static int replmul[1+8] = { static void mktables(void) { - int i, j, small; + int i, j, mask, sh, small; if(tablesbuilt) return; @@ -357,6 +372,17 @@ mktables(void) } } + /* bit unpacking up to 8 bits, only powers of 2 */ + for(i=0; i<256; i++){ + for(j=0, sh=7, mask=1; j<8; j++, sh--) + conv18[i][j] = replbit[1][(i>>sh)&mask]; + + for(j=0, sh=6, mask=3; j<4; j++, sh-=2) + conv28[i][j] = replbit[2][(i>>sh)&mask]; + + for(j=0, sh=4, mask=15; j<2; j++, sh-=4) + conv48[i][j] = replbit[4][(i>>sh)&mask]; + } } static uchar ones = 0xff; @@ -770,41 +796,85 @@ alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op) return bdst; } +/* + * Do the channels in the buffers match enough + * that we can do word-at-a-time operations + * on the pixels? + */ +static int +chanmatch(Buffer *bdst, Buffer *bsrc) +{ + uchar *drgb, *srgb; + + /* + * first, r, g, b must be in the same place + * in the rgba word. + */ + drgb = (uchar*)bdst->rgba; + srgb = (uchar*)bsrc->rgba; + if(bdst->red - drgb != bsrc->red - srgb + || bdst->blu - drgb != bsrc->blu - srgb + || bdst->grn - drgb != bsrc->grn - srgb) + return 0; + + /* + * that implies alpha is in the same place, + * if it is there at all (it might be == &ones). + * if the destination is &ones, we can scribble + * over the rgba slot just fine. + */ + if(bdst->alpha == &ones) + return 1; + + /* + * if the destination is not ones but the src is, + * then the simultaneous calculation will use + * bogus bytes from the src's rgba. no good. + */ + if(bsrc->alpha == &ones) + return 0; + + /* + * otherwise, alphas are in the same place. + */ + return 1; +} + static Buffer alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op) { Buffer obdst; int fd, sadelta; int i, sa, ma, q; - ulong s, t; + ulong t, t1; obdst = bdst; sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta; - q = bsrc.delta == 4 && bdst.delta == 4; + q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc); for(i=0; isdval; /* make little endian */ @@ -2457,7 +2527,6 @@ _memfillcolor(Memimage *i, ulong val) { ulong bits; int d, y; - uchar p[4]; if(val == DNofill) return; @@ -2471,11 +2540,6 @@ _memfillcolor(Memimage *i, ulong val) default: /* 1, 2, 4, 8, 16, 32 */ for(d=i->depth; d<32; d*=2) bits = (bits << d) | bits; - p[0] = bits; /* make little endian */ - p[1] = bits>>8; - p[2] = bits>>16; - p[3] = bits>>24; - bits = *(ulong*)p; memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r)); break; }