Simplify the blitting code slightly, and fix fenceposts.

This commit is contained in:
mycroft 2002-09-16 16:33:13 +00:00
parent 4ae80dbd09
commit 6db8861212
1 changed files with 58 additions and 68 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: tga.c,v 1.42 2002/07/04 14:37:11 junyoung Exp $ */
/* $NetBSD: tga.c,v 1.43 2002/09/16 16:33:13 mycroft Exp $ */
/*
* Copyright (c) 1995, 1996 Carnegie-Mellon University.
@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tga.c,v 1.42 2002/07/04 14:37:11 junyoung Exp $");
__KERNEL_RCSID(0, "$NetBSD: tga.c,v 1.43 2002/09/16 16:33:13 mycroft Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -1072,32 +1072,6 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
return -1;
}
wb = w * (dst->ri_depth / 8);
if (sy >= dy) {
ystart = 0;
yend = h;
ydir = 1;
} else {
ystart = h;
yend = 0;
ydir = -1;
}
if (sx >= dx) { /* moving to the left */
xstart = 0;
xend = w * (dst->ri_depth / 8) - 4;
xdir = 1;
} else { /* moving to the right */
xstart = wb - ( wb >= 4*64 ? 4*64 : wb >= 64 ? 64 : 4 );
xend = 0;
xdir = -1;
}
#define XINC4 4
#define XINC64 64
#define XINC256 (64*4)
yinc = ydir * dst->ri_stride;
ystart *= dst->ri_stride;
yend *= dst->ri_stride;
srcb = sy * src->ri_stride + sx * (src->ri_depth/8);
dstb = dy * dst->ri_stride + dx * (dst->ri_depth/8);
tga_srcb = offset + (sy + src->ri_yorigin) * src->ri_stride +
@ -1105,6 +1079,28 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
tga_dstb = offset + (dy + dst->ri_yorigin) * dst->ri_stride +
(dx + dst->ri_xorigin) * (dst->ri_depth/8);
if (sy >= dy) {
ystart = 0;
yend = (h - 1) * dst->ri_stride;
ydir = 1;
} else {
ystart = (h - 1) * dst->ri_stride;
yend = 0;
ydir = -1;
}
yinc = ydir * dst->ri_stride;
wb = w * (dst->ri_depth / 8);
if (sx >= dx) { /* moving to the left */
xstart = 0;
xend = wb;
xdir = 1;
} else { /* moving to the right */
xstart = wb;
xend = 0;
xdir = -1;
}
TGAWALREG(dc, TGA_REG_GMOR, 3, 0x0007); /* Copy mode */
TGAWALREG(dc, TGA_REG_GOPR, 3, map_rop[rop]); /* Set up the op */
@ -1118,11 +1114,9 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
if (xdir == 1) { /* move to the left */
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
/* 4*64 byte chunks */
for (xleft = wb, x = xstart;
x <= xend && xleft >= 4*64;
x += XINC256, xleft -= XINC256) {
for (xleft = wb, x = xstart; xleft >= 4*64;
x += 4*64, xleft -= 4*64) {
/* XXX XXX Eight writes to different addresses should fill
* XXX XXX up the write buffers on 21064 and 21164 chips,
@ -1141,35 +1135,33 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
}
/* 64 byte chunks */
for ( ; x <= xend && xleft >= 64;
x += XINC64, xleft -= XINC64) {
for (; xleft >= 64; x += 64, xleft -= 64) {
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 0 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 0 * 64);
}
lastx = x; lastleft = xleft; /* remember for CPU loop */
lastx = x; lastleft = xleft; /* remember for CPU loop */
}
TGAWALREG(dc, TGA_REG_GOPR, 0, 0x0003); /* op -> dst = src */
TGAWALREG(dc, TGA_REG_GMOR, 0, 0x0000); /* Simple mode */
if (lastleft) {
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
/* 4 byte granularity */
for (x = lastx, xleft = lastleft;
x <= xend && xleft >= 4;
x += XINC4, xleft -= XINC4) {
*(uint32_t *)(dst->ri_bits + dstb + y + x) =
*(uint32_t *)(dst->ri_bits + srcb + y + x);
for (x = lastx, xleft = lastleft; xleft >= 4;
x += 4, xleft -= 4) {
*(uint32_t *)(dst->ri_bits + dstb + y + x + 0 * 4) =
*(uint32_t *)(dst->ri_bits + srcb + y + x + 0 * 4);
}
}
}
}
else { /* above move to the left, below move to the right */
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
/* 4*64 byte chunks */
for (xleft = wb, x = xstart;
x >= xend && xleft >= 4*64;
x -= XINC256, xleft -= XINC256) {
for (xleft = wb, x = xstart; xleft >= 4*64;
x -= 4*64, xleft -= 4*64) {
/* XXX XXX Eight writes to different addresses should fill
* XXX XXX up the write buffers on 21064 and 21164 chips,
@ -1177,37 +1169,35 @@ tga_rop_vtov(dst, dx, dy, w, h, rop, src, sx, sy)
* XXX XXX require further unrolling of this loop, or the
* XXX XXX insertion of memory barriers.
*/
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 3 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 3 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 1, tga_srcb + y + x + 2 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 1, tga_dstb + y + x + 2 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 2, tga_srcb + y + x + 1 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 2, tga_dstb + y + x + 1 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 3, tga_srcb + y + x + 0 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 3, tga_dstb + y + x + 0 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x - 1 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x - 1 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 1, tga_srcb + y + x - 2 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 1, tga_dstb + y + x - 2 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 2, tga_srcb + y + x - 3 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 2, tga_dstb + y + x - 3 * 64);
TGAWALREG(dc, TGA_REG_GCSR, 3, tga_srcb + y + x - 4 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 3, tga_dstb + y + x - 4 * 64);
}
if (xleft) x += XINC256 - XINC64;
/* 64 byte chunks */
for ( ; x >= xend && xleft >= 64;
x -= XINC64, xleft -= XINC64) {
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x + 0 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x + 0 * 64);
for (; xleft >= 64; x -= 64, xleft -= 64) {
TGAWALREG(dc, TGA_REG_GCSR, 0, tga_srcb + y + x - 1 * 64);
TGAWALREG(dc, TGA_REG_GCDR, 0, tga_dstb + y + x - 1 * 64);
}
if (xleft) x += XINC64 - XINC4;
lastx = x; lastleft = xleft; /* remember for CPU loop */
}
TGAWALREG(dc, TGA_REG_GOPR, 0, 0x0003); /* op -> dst = src */
TGAWALREG(dc, TGA_REG_GMOR, 0, 0x0000); /* Simple mode */
if (lastleft) {
for (y = ystart; (ydir * y) <= (ydir * yend); y += yinc) {
/* 4 byte granularity */
for (x = lastx, xleft = lastleft;
x >= xend && xleft >= 4;
x -= XINC4, xleft -= XINC4) {
*(uint32_t *)(dst->ri_bits + dstb + y + x) =
*(uint32_t *)(dst->ri_bits + srcb + y + x);
for (x = lastx, xleft = lastleft; xleft >= 4;
x -= 4, xleft -= 4) {
*(uint32_t *)(dst->ri_bits + dstb + y + x - 1 * 4) =
*(uint32_t *)(dst->ri_bits + srcb + y + x - 1 * 4);
}
}
}
}