NetBSD/usr.bin/sort/fields.c

363 lines
11 KiB
C
Raw Normal View History

/* $NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $ */
2003-08-07 15:32:34 +04:00
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Ben Harris and Jaromir Dolecek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
2000-10-07 20:39:34 +04:00
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
2000-10-07 20:39:34 +04:00
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Subroutines to generate sort keys. */
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $");
__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#define SKIP_BLANKS(ptr) { \
2000-10-07 20:39:34 +04:00
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
2000-10-07 20:39:34 +04:00
}
static u_char *enterfield(u_char *, const u_char *, struct field *, int);
static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
2000-10-07 20:39:34 +04:00
#define DECIMAL '.'
#define OFFSET 128
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
u_char fnum[NBINS], rnum[NBINS];
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
size_t line_size, struct field fieldtable[])
/* keybuf: pointer to start of key */
2000-10-07 20:39:34 +04:00
{
int i;
2000-10-16 00:46:33 +04:00
u_char *l_d_mask;
u_char *lineend, *pos;
const u_char *endkey;
u_char *keypos;
2000-10-16 00:46:33 +04:00
struct coldesc *clpos;
int col = 1;
2000-10-07 20:39:34 +04:00
struct field *ftpos;
2000-10-07 20:39:34 +04:00
l_d_mask = d_mask;
pos = line_data - 1;
lineend = line_data + line_size-1;
2000-10-07 20:39:34 +04:00
/* don't include rec_delimiter */
for (i = 0; i < ncols; i++) {
2001-01-13 22:01:14 +03:00
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++) {
NEXTCOL(pos);
}
2000-10-07 20:39:34 +04:00
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
i++;
2000-10-07 20:39:34 +04:00
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < line_data)
clist[0].start++;
/*
* We write the sort keys (concatenated) followed by the
* original line data (for output) as the 'keybuf' data.
* keybuf->length is the number of key bytes + data bytes.
* keybuf->offset is the number of key bytes.
* We add a record separator (usually \n) after the key in case
* (as is usual) we need to preserve the order of equal lines,
* and for 'sort -u'.
*/
keypos = keybuf->data;
endkey = keybuf_end - line_size - 1;
if (endkey <= keypos)
/* No room for any key bytes */
return 1;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
2000-10-07 20:39:34 +04:00
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
}
*keypos++ = REC_D;
2000-10-07 20:39:34 +04:00
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line_size;
memcpy(keypos, line_data, line_size);
2000-10-07 20:39:34 +04:00
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
static u_char *
enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
int gflags)
2000-10-07 20:39:34 +04:00
{
2000-10-16 00:46:33 +04:00
u_char *start, *end, *lineend, *mask, *lweight;
2000-10-07 20:39:34 +04:00
struct column icol, tcol;
2000-10-16 00:46:33 +04:00
u_int flags;
2000-10-07 20:39:34 +04:00
u_int Rflag;
2000-10-07 20:39:34 +04:00
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
SKIP_BLANKS(start);
2000-10-07 20:39:34 +04:00
start += icol.indent;
start = min(start, lineend);
2000-10-07 20:39:34 +04:00
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT)
SKIP_BLANKS(end);
2000-10-07 20:39:34 +04:00
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
2000-10-07 20:39:34 +04:00
if (flags & N) {
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
return number(tablepos, endkey, start, end, Rflag);
2000-10-07 20:39:34 +04:00
}
2000-10-07 20:39:34 +04:00
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++)
if (mask[*start]) {
if (*start <= 1) {
if (tablepos+2 >= endkey)
return (NULL);
*tablepos++ = lweight[1];
*tablepos++ = lweight[*start ? 2 : 1];
} else {
if (tablepos+1 >= endkey)
return (NULL);
*tablepos++ = lweight[*start];
2000-10-07 20:39:34 +04:00
}
}
*tablepos++ = lweight[0];
return (tablepos == endkey ? NULL : tablepos);
}
/* Uses the first bin to assign sign, expsign, 0, and the first
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
* When sorting in forward order:
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
* else use (0-99)->(2-102).
* If the exponent is >=61, use another byte for each additional 253
* in the exponent. Cutoff is at 567.
* To avoid confusing the exponent and the mantissa, use a field delimiter
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
* only time a field delimiter can come in that position.
* Reverse order is done analagously.
*/
2000-10-07 20:39:34 +04:00
static u_char *
number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int Rflag)
2000-10-07 20:39:34 +04:00
{
2000-10-16 00:46:33 +04:00
int or_sign, parity = 0;
int expincr = 1, exponent = -1;
int bite, expsign = 1, sign = 1, zeroskip = 0;
2004-02-15 14:54:17 +03:00
u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS;
2000-10-07 20:39:34 +04:00
u_char *nweights;
if (Rflag)
nweights = rnum;
else
nweights = fnum;
if (pos > bufend - 8)
return (NULL);
/*
* or_sign sets the sort direction:
* (-r: +/-)(sign: +/-)(expsign: +/-)
*/
2000-10-07 20:39:34 +04:00
or_sign = sign ^ expsign ^ Rflag;
SKIP_BLANKS(line);
2000-10-07 20:39:34 +04:00
if (*line == '-') { /* set the sign */
or_sign ^= 1;
sign = 0;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++)
zeroskip = 1;
2000-10-07 20:39:34 +04:00
/* calculate exponents < 0 */
if (*line == DECIMAL) {
exponent = 1;
while (*++line == '0' && line < lineend)
exponent++;
expincr = 0;
expsign = 0;
}
/* next character better be a digit */
if (*line < '1' || *line > '9' || line >= lineend) {
/* only exit if we didn't skip any zero number */
if (!zeroskip) {
*pos++ = nweights[127];
return (pos);
}
2000-10-07 20:39:34 +04:00
}
if (expincr) {
for (tline = line-1; *++tline >= '0' &&
*tline <= '9' && tline < lineend;)
exponent++;
}
if (exponent > 567) {
*pos++ = nweights[sign ? (expsign ? 254 : 128)
: (expsign ? 0 : 126)];
warnx("exponent out of bounds");
return (pos);
}
bite = min(exponent, 61);
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
: (expsign ? 64-bite : 64+bite)];
if (bite >= 61) {
do {
exponent -= bite;
bite = min(exponent, 254);
*pos++ = nweights[or_sign ? 254-bite : bite];
} while (bite == 254);
}
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
for (; line < lineend; line++) {
if (*line >= '0' && *line <= '9') {
if (parity) {
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
: *line);
if (pos == bufend)
return (NULL);
if (*line != '0' || lastvalue != '0')
nonzero = pos;
} else
lastvalue = *line;
parity ^= 1;
2003-10-18 07:03:20 +04:00
} else if (*line == DECIMAL) {
if (!expincr) /* a decimal already occurred once */
2000-10-07 20:39:34 +04:00
break;
expincr = 0;
} else
break;
}
if ((parity && lastvalue != '0') || !nonzero) {
2000-10-07 20:39:34 +04:00
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
OFF_TENS[lastvalue] + '0';
} else
pos = nonzero;
if (pos > bufend-1)
return (NULL);
*pos++ = or_sign ? nweights[254] : nweights[0];
return (pos);
}
/* This forces a gap around the record delimiter
* Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
2000-10-07 20:39:34 +04:00
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
*/
2000-10-07 20:39:34 +04:00
void
num_init(void)
2000-10-07 20:39:34 +04:00
{
int i;
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
OFF_TENS = TENS - '0';
OFF_NTENS = NEGTENS - '0';
for (i = 1; i < 10; i++) {
TENS[i] = TENS[i - 1] + 10;
NEGTENS[i] = NEGTENS[i - 1] - 10;
2000-10-07 20:39:34 +04:00
}
for (i = 0; i < REC_D; i++) {
fnum[i] = i;
rnum[255 - i] = i;
2000-10-07 20:39:34 +04:00
}
for (i = REC_D; i <255; i++) {
fnum[i] = i + 1;
rnum[255 - i] = i - 1;
2000-10-07 20:39:34 +04:00
}
}