NetBSD/usr.bin/sort/fields.c
heas 2a3d05aa4e Do not step over the edge of the buffer (check for '\0'). This just happens
to not lose on i386 because another buffer appears immediately following.
Regress tests all passed.
2004-03-14 21:12:14 +00:00

372 lines
11 KiB
C

/* $NetBSD: fields.c,v 1.18 2004/03/14 21:12:14 heas Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Subroutines to generate sort keys. */
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: fields.c,v 1.18 2004/03/14 21:12:14 heas Exp $");
__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#define SKIP_BLANKS(ptr) { \
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
}
static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
#define DECIMAL '.'
#define OFFSET 128
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
u_char fnum[NBINS], rnum[NBINS];
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(keybuf, line, size, fieldtable)
RECHEADER *keybuf; /* pointer to start of key */
DBT *line;
int size;
struct field fieldtable[];
{
int i;
u_char *l_d_mask;
u_char *lineend, *pos;
u_char *endkey, *keypos;
struct coldesc *clpos;
int col = 1;
struct field *ftpos;
l_d_mask = d_mask;
pos = (u_char *) line->data - 1;
lineend = (u_char *) line->data + line->size-1;
/* don't include rec_delimiter */
for (i = 0; i < ncols; i++) {
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++) {
NEXTCOL(pos);
}
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
i++;
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < (u_char *) line->data)
clist[0].start++;
keypos = keybuf->data;
endkey = (u_char *) keybuf + size - line->size;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line->size;
if (keybuf->length + sizeof(TRECHEADER) > size) {
/* line too long for buffer */
return (1);
}
/*
* Make [s]radixsort() only sort by relevant part of key if:
* 1. we want to choose unique items by relevant field[s]
* 2. we want stable sort and so the items should be sorted only by
* the relevant field[s]
*/
if (UNIQUE || (stable_sort && keybuf->offset < line->size))
keypos[-1] = REC_D;
memcpy(keybuf->data + keybuf->offset, line->data, line->size);
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
static u_char *
enterfield(tablepos, endkey, cur_fld, gflags)
struct field *cur_fld;
u_char *tablepos, *endkey;
int gflags;
{
u_char *start, *end, *lineend, *mask, *lweight;
struct column icol, tcol;
u_int flags;
u_int Rflag;
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
SKIP_BLANKS(start);
start += icol.indent;
start = min(start, lineend);
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT)
SKIP_BLANKS(end);
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
if (flags & N) {
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
return number(tablepos, endkey, start, end, Rflag);
}
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++)
if (mask[*start]) {
if (*start <= 1) {
if (tablepos+2 >= endkey)
return (NULL);
*tablepos++ = lweight[1];
*tablepos++ = lweight[*start ? 2 : 1];
} else {
if (tablepos+1 >= endkey)
return (NULL);
*tablepos++ = lweight[*start];
}
}
*tablepos++ = lweight[0];
return (tablepos == endkey ? NULL : tablepos);
}
/* Uses the first bin to assign sign, expsign, 0, and the first
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
* When sorting in forward order:
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
* else use (0-99)->(2-102).
* If the exponent is >=61, use another byte for each additional 253
* in the exponent. Cutoff is at 567.
* To avoid confusing the exponent and the mantissa, use a field delimiter
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
* only time a field delimiter can come in that position.
* Reverse order is done analagously.
*/
static u_char *
number(pos, bufend, line, lineend, Rflag)
u_char *line, *pos, *bufend, *lineend;
int Rflag;
{
int or_sign, parity = 0;
int expincr = 1, exponent = -1;
int bite, expsign = 1, sign = 1, zeroskip = 0;
u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS;
u_char *nweights;
if (Rflag)
nweights = rnum;
else
nweights = fnum;
if (pos > bufend - 8)
return (NULL);
/*
* or_sign sets the sort direction:
* (-r: +/-)(sign: +/-)(expsign: +/-)
*/
or_sign = sign ^ expsign ^ Rflag;
SKIP_BLANKS(line);
if (*line == '-') { /* set the sign */
or_sign ^= 1;
sign = 0;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++)
zeroskip = 1;
/* calculate exponents < 0 */
if (*line == DECIMAL) {
exponent = 1;
while (*++line == '0' && line < lineend)
exponent++;
expincr = 0;
expsign = 0;
}
/* next character better be a digit */
if (*line < '1' || *line > '9' || line >= lineend) {
/* only exit if we didn't skip any zero number */
if (!zeroskip) {
*pos++ = nweights[127];
return (pos);
}
}
if (expincr) {
for (tline = line-1; *++tline >= '0' &&
*tline <= '9' && tline < lineend;)
exponent++;
}
if (exponent > 567) {
*pos++ = nweights[sign ? (expsign ? 254 : 128)
: (expsign ? 0 : 126)];
warnx("exponent out of bounds");
return (pos);
}
bite = min(exponent, 61);
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
: (expsign ? 64-bite : 64+bite)];
if (bite >= 61) {
do {
exponent -= bite;
bite = min(exponent, 254);
*pos++ = nweights[or_sign ? 254-bite : bite];
} while (bite == 254);
}
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
for (; line < lineend; line++) {
if (*line >= '0' && *line <= '9') {
if (parity) {
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
: *line);
if (pos == bufend)
return (NULL);
if (*line != '0' || lastvalue != '0')
nonzero = pos;
} else
lastvalue = *line;
parity ^= 1;
} else if (*line == DECIMAL) {
if (!expincr) /* a decimal already occurred once */
break;
expincr = 0;
} else
break;
}
if ((parity && lastvalue != '0') || !nonzero) {
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
OFF_TENS[lastvalue] + '0';
} else
pos = nonzero;
if (pos > bufend-1)
return (NULL);
*pos++ = or_sign ? nweights[254] : nweights[0];
return (pos);
}
/* This forces a gap around the record delimiter
* Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
*/
void
num_init()
{
int i;
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
OFF_TENS = TENS - '0';
OFF_NTENS = NEGTENS - '0';
for (i = 1; i < 10; i++) {
TENS[i] = TENS[i - 1] + 10;
NEGTENS[i] = NEGTENS[i - 1] - 10;
}
for (i = 0; i < REC_D; i++) {
fnum[i] = i;
rnum[255 - i] = i;
}
for (i = REC_D; i <255; i++) {
fnum[i] = i + 1;
rnum[255 - i] = i - 1;
}
}