Simplify the blitting code slightly, and fix fenceposts.
This commit is contained in:
parent
4ae80dbd09
commit
6db8861212
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tga.c,v 1.42 2002/07/04 14:37:11 junyoung Exp $ */
|
||||
/* $NetBSD: tga.c,v 1.43 2002/09/16 16:33:13 mycroft Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995, 1996 Carnegie-Mellon University.
|
||||
|
@ -28,7 +28,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: tga.c,v 1.42 2002/07/04 14:37:11 junyoung Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: tga.c,v 1.43 2002/09/16 16:33:13 mycroft Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -1072,32 +1072,6 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
|
|||
return -1;
|
||||
}
|
||||
|
||||
wb = w * (dst->ri_depth / 8);
|
||||
if (sy >= dy) {
|
||||
ystart = 0;
|
||||
yend = h;
|
||||
ydir = 1;
|
||||
} else {
|
||||
ystart = h;
|
||||
yend = 0;
|
||||
ydir = -1;
|
||||
}
|
||||
if (sx >= dx) { /* moving to the left */
|
||||
xstart = 0;
|
||||
xend = w * (dst->ri_depth / 8) - 4;
|
||||
xdir = 1;
|
||||
} else { /* moving to the right */
|
||||
xstart = wb - ( wb >= 4*64 ? 4*64 : wb >= 64 ? 64 : 4 );
|
||||
xend = 0;
|
||||
xdir = -1;
|
||||
}
|
||||
#define XINC4 4
|
||||
#define XINC64 64
|
||||
#define XINC256 (64*4)
|
||||
yinc = ydir * dst->ri_stride;
|
||||
ystart *= dst->ri_stride;
|
||||
yend *= dst->ri_stride;
|
||||
|
||||
srcb = sy * src->ri_stride + sx * (src->ri_depth/8);
|
||||
dstb = dy * dst->ri_stride + dx * (dst->ri_depth/8);
|
||||
tga_srcb = offset + (sy + src->ri_yorigin) * src->ri_stride +
|
||||
|
@ -1105,6 +1079,28 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
|
|||
tga_dstb = offset + (dy + dst->ri_yorigin) * dst->ri_stride +
|
||||
(dx + dst->ri_xorigin) * (dst->ri_depth/8);
|
||||
|
||||
if (sy >= dy) {
|
||||
ystart = 0;
|
||||
yend = (h - 1) * dst->ri_stride;
|
||||
ydir = 1;
|
||||
} else {
|
||||
ystart = (h - 1) * dst->ri_stride;
|
||||
yend = 0;
|
||||
ydir = -1;
|
||||
}
|
||||
yinc = ydir * dst->ri_stride;
|
||||
|
||||
wb = w * (dst->ri_depth / 8);
|
||||
if (sx >= dx) { /* moving to the left */
|
||||
xstart = 0;
|
||||
xend = wb;
|
||||
xdir = 1;
|
||||
} else { /* moving to the right */
|
||||
xstart = wb;
|
||||
xend = 0;
|
||||
xdir = -1;
|
||||
}
|
||||
|
||||
TGAWALREG(dc, TGA_REG_GMOR, 3, 0x0007); /* Copy mode */
|
||||
TGAWALREG(dc, TGA_REG_GOPR, 3, map_rop[rop]); /* Set up the op */
|
||||
|
||||
|
@ -1118,11 +1114,9 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
|
|||
if (xdir == 1) { /* move to the left */
|
||||
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
|
||||
/* 4*64 byte chunks */
|
||||
for (xleft = wb, x = xstart;
|
||||
x <= xend && xleft >= 4*64;
|
||||
x += XINC256, xleft -= XINC256) {
|
||||
for (xleft = wb, x = xstart; xleft >= 4*64;
|
||||
x += 4*64, xleft -= 4*64) {
|
||||
|
||||
/* XXX XXX Eight writes to different addresses should fill
|
||||
* XXX XXX up the write buffers on 21064 and 21164 chips,
|
||||
|
@ -1141,35 +1135,33 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
|
|||
}
|
||||
|
||||
/* 64 byte chunks */
|
||||
for ( ; x <= xend && xleft >= 64;
|
||||
x += XINC64, xleft -= XINC64) {
|
||||
for (; xleft >= 64; x += 64, xleft -= 64) {
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 0 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 0 * 64);
|
||||
}
|
||||
lastx = x; lastleft = xleft; /* remember for CPU loop */
|
||||
|
||||
lastx = x; lastleft = xleft; /* remember for CPU loop */
|
||||
}
|
||||
TGAWALREG(dc, TGA_REG_GOPR, 0, 0x0003); /* op -> dst = src */
|
||||
TGAWALREG(dc, TGA_REG_GMOR, 0, 0x0000); /* Simple mode */
|
||||
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
/* 4 byte granularity */
|
||||
for (x = lastx, xleft = lastleft;
|
||||
x <= xend && xleft >= 4;
|
||||
x += XINC4, xleft -= XINC4) {
|
||||
*(uint32_t *)(dst->ri_bits + dstb + y + x) =
|
||||
*(uint32_t *)(dst->ri_bits + srcb + y + x);
|
||||
if (lastleft) {
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
/* 4 byte granularity */
|
||||
for (x = lastx, xleft = lastleft; xleft >= 4;
|
||||
x += 4, xleft -= 4) {
|
||||
*(uint32_t *)(dst->ri_bits + dstb + y + x + 0 * 4) =
|
||||
*(uint32_t *)(dst->ri_bits + srcb + y + x + 0 * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* above move to the left, below move to the right */
|
||||
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
|
||||
/* 4*64 byte chunks */
|
||||
for (xleft = wb, x = xstart;
|
||||
x >= xend && xleft >= 4*64;
|
||||
x -= XINC256, xleft -= XINC256) {
|
||||
for (xleft = wb, x = xstart; xleft >= 4*64;
|
||||
x -= 4*64, xleft -= 4*64) {
|
||||
|
||||
/* XXX XXX Eight writes to different addresses should fill
|
||||
* XXX XXX up the write buffers on 21064 and 21164 chips,
|
||||
|
@ -1177,37 +1169,35 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
|
|||
* XXX XXX require further unrolling of this loop, or the
|
||||
* XXX XXX insertion of memory barriers.
|
||||
*/
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 3 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 3 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 1, tga_srcb + y + x + 2 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 1, tga_dstb + y + x + 2 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 2, tga_srcb + y + x + 1 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 2, tga_dstb + y + x + 1 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 3, tga_srcb + y + x + 0 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 3, tga_dstb + y + x + 0 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x - 1 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x - 1 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 1, tga_srcb + y + x - 2 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 1, tga_dstb + y + x - 2 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 2, tga_srcb + y + x - 3 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 2, tga_dstb + y + x - 3 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 3, tga_srcb + y + x - 4 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 3, tga_dstb + y + x - 4 * 64);
|
||||
}
|
||||
|
||||
if (xleft) x += XINC256 - XINC64;
|
||||
|
||||
/* 64 byte chunks */
|
||||
for ( ; x >= xend && xleft >= 64;
|
||||
x -= XINC64, xleft -= XINC64) {
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 0 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 0 * 64);
|
||||
for (; xleft >= 64; x -= 64, xleft -= 64) {
|
||||
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x - 1 * 64);
|
||||
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x - 1 * 64);
|
||||
}
|
||||
if (xleft) x += XINC64 - XINC4;
|
||||
|
||||
lastx = x; lastleft = xleft; /* remember for CPU loop */
|
||||
}
|
||||
TGAWALREG(dc, TGA_REG_GOPR, 0, 0x0003); /* op -> dst = src */
|
||||
TGAWALREG(dc, TGA_REG_GMOR, 0, 0x0000); /* Simple mode */
|
||||
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
/* 4 byte granularity */
|
||||
for (x = lastx, xleft = lastleft;
|
||||
x >= xend && xleft >= 4;
|
||||
x -= XINC4, xleft -= XINC4) {
|
||||
*(uint32_t *)(dst->ri_bits + dstb + y + x) =
|
||||
*(uint32_t *)(dst->ri_bits + srcb + y + x);
|
||||
if (lastleft) {
|
||||
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
|
||||
/* 4 byte granularity */
|
||||
for (x = lastx, xleft = lastleft; xleft >= 4;
|
||||
x -= 4, xleft -= 4) {
|
||||
*(uint32_t *)(dst->ri_bits + dstb + y + x - 1 * 4) =
|
||||
*(uint32_t *)(dst->ri_bits + srcb + y + x - 1 * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue