2009-08-20 10:36:25 +04:00
|
|
|
/* $NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $ */
|
2003-08-07 15:32:34 +04:00
|
|
|
|
|
|
|
/*-
|
|
|
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Ben Harris and Jaromir Dolecek.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
2000-10-07 22:37:09 +04:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Peter McIlroy.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 15:13:06 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
2000-10-07 20:39:34 +04:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Subroutines to generate sort keys. */
|
|
|
|
|
|
|
|
#include "sort.h"
|
|
|
|
|
2000-10-07 22:37:09 +04:00
|
|
|
#ifndef lint
|
2009-08-20 10:36:25 +04:00
|
|
|
__RCSID("$NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $");
|
2000-10-07 22:37:09 +04:00
|
|
|
__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
|
|
|
|
#endif /* not lint */
|
|
|
|
|
2003-04-09 13:30:40 +04:00
|
|
|
#define SKIP_BLANKS(ptr) { \
|
2000-10-07 20:39:34 +04:00
|
|
|
if (BLANK & d_mask[*(ptr)]) \
|
|
|
|
while (BLANK & d_mask[*(++(ptr))]); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NEXTCOL(pos) { \
|
|
|
|
if (!SEP_FLAG) \
|
|
|
|
while (BLANK & l_d_mask[*(++pos)]); \
|
2004-03-15 00:12:14 +03:00
|
|
|
while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
|
2009-08-15 22:40:01 +04:00
|
|
|
static u_char *enterfield(u_char *, const u_char *, struct field *, int);
|
|
|
|
static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
|
2000-10-07 20:39:34 +04:00
|
|
|
|
|
|
|
#define DECIMAL '.'
|
|
|
|
#define OFFSET 128
|
|
|
|
|
|
|
|
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
|
|
|
|
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
|
|
|
|
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
|
|
|
|
u_char fnum[NBINS], rnum[NBINS];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* constructs sort key with leading recheader, followed by the key,
|
|
|
|
* followed by the original line.
|
|
|
|
*/
|
|
|
|
length_t
|
2009-08-20 10:36:25 +04:00
|
|
|
enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
|
|
|
|
size_t line_size, struct field fieldtable[])
|
2009-08-15 13:48:46 +04:00
|
|
|
/* keybuf: pointer to start of key */
|
2000-10-07 20:39:34 +04:00
|
|
|
{
|
|
|
|
int i;
|
2000-10-16 00:46:33 +04:00
|
|
|
u_char *l_d_mask;
|
|
|
|
u_char *lineend, *pos;
|
2009-08-15 22:40:01 +04:00
|
|
|
const u_char *endkey;
|
|
|
|
u_char *keypos;
|
2000-10-16 00:46:33 +04:00
|
|
|
struct coldesc *clpos;
|
|
|
|
int col = 1;
|
2000-10-07 20:39:34 +04:00
|
|
|
struct field *ftpos;
|
2009-08-15 22:40:01 +04:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
l_d_mask = d_mask;
|
2009-08-15 22:40:01 +04:00
|
|
|
pos = line_data - 1;
|
|
|
|
lineend = line_data + line_size-1;
|
2000-10-07 20:39:34 +04:00
|
|
|
/* don't include rec_delimiter */
|
|
|
|
|
2001-01-13 22:04:21 +03:00
|
|
|
for (i = 0; i < ncols; i++) {
|
2001-01-13 22:01:14 +03:00
|
|
|
clpos = clist + i;
|
2001-02-19 23:50:17 +03:00
|
|
|
for (; (col < clpos->num) && (pos < lineend); col++) {
|
|
|
|
NEXTCOL(pos);
|
|
|
|
}
|
2000-10-07 20:39:34 +04:00
|
|
|
if (pos >= lineend)
|
|
|
|
break;
|
|
|
|
clpos->start = SEP_FLAG ? pos + 1 : pos;
|
|
|
|
NEXTCOL(pos);
|
|
|
|
clpos->end = pos;
|
|
|
|
col++;
|
|
|
|
if (pos >= lineend) {
|
|
|
|
clpos->end = lineend;
|
2001-02-19 23:50:17 +03:00
|
|
|
i++;
|
2000-10-07 20:39:34 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (; i <= ncols; i++)
|
|
|
|
clist[i].start = clist[i].end = lineend;
|
2009-08-15 22:40:01 +04:00
|
|
|
if (clist[0].start < line_data)
|
2001-02-19 23:50:17 +03:00
|
|
|
clist[0].start++;
|
2001-02-19 22:41:31 +03:00
|
|
|
|
2009-08-16 01:26:32 +04:00
|
|
|
/*
|
|
|
|
* We write the sort keys (concatenated) followed by the
|
|
|
|
* original line data (for output) as the 'keybuf' data.
|
|
|
|
* keybuf->length is the number of key bytes + data bytes.
|
|
|
|
* keybuf->offset is the number of key bytes.
|
|
|
|
* We add a record separator (usually \n) after the key in case
|
|
|
|
* (as is usual) we need to preserve the order of equal lines,
|
|
|
|
* and for 'sort -u'.
|
|
|
|
*/
|
2001-02-19 22:41:31 +03:00
|
|
|
keypos = keybuf->data;
|
2009-08-16 01:26:32 +04:00
|
|
|
endkey = keybuf_end - line_size - 1;
|
|
|
|
if (endkey <= keypos)
|
|
|
|
/* No room for any key bytes */
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
|
2000-10-07 20:39:34 +04:00
|
|
|
if ((keypos = enterfield(keypos, endkey, ftpos,
|
|
|
|
fieldtable->flags)) == NULL)
|
|
|
|
return (1);
|
2009-08-16 01:26:32 +04:00
|
|
|
}
|
|
|
|
*keypos++ = REC_D;
|
2000-10-07 20:39:34 +04:00
|
|
|
|
|
|
|
keybuf->offset = keypos - keybuf->data;
|
2009-08-15 22:40:01 +04:00
|
|
|
keybuf->length = keybuf->offset + line_size;
|
2001-01-12 22:39:02 +03:00
|
|
|
|
2009-08-16 01:26:32 +04:00
|
|
|
memcpy(keypos, line_data, line_size);
|
2000-10-07 20:39:34 +04:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* constructs a field (as defined by -k) within a key
|
|
|
|
*/
|
2001-01-12 22:39:02 +03:00
|
|
|
static u_char *
|
2009-08-20 10:36:25 +04:00
|
|
|
enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
|
|
|
|
int gflags)
|
2000-10-07 20:39:34 +04:00
|
|
|
{
|
2000-10-16 00:46:33 +04:00
|
|
|
u_char *start, *end, *lineend, *mask, *lweight;
|
2000-10-07 20:39:34 +04:00
|
|
|
struct column icol, tcol;
|
2000-10-16 00:46:33 +04:00
|
|
|
u_int flags;
|
2000-10-07 20:39:34 +04:00
|
|
|
u_int Rflag;
|
2001-02-19 23:50:17 +03:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
icol = cur_fld->icol;
|
|
|
|
tcol = cur_fld->tcol;
|
|
|
|
flags = cur_fld->flags;
|
|
|
|
start = icol.p->start;
|
|
|
|
lineend = clist[ncols].end;
|
|
|
|
if (flags & BI)
|
2003-04-09 13:30:40 +04:00
|
|
|
SKIP_BLANKS(start);
|
2000-10-07 20:39:34 +04:00
|
|
|
start += icol.indent;
|
|
|
|
start = min(start, lineend);
|
2001-02-19 23:50:17 +03:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
if (!tcol.num)
|
|
|
|
end = lineend;
|
|
|
|
else {
|
|
|
|
if (tcol.indent) {
|
|
|
|
end = tcol.p->start;
|
2001-01-12 22:39:02 +03:00
|
|
|
if (flags & BT)
|
2003-04-09 13:30:40 +04:00
|
|
|
SKIP_BLANKS(end);
|
2000-10-07 20:39:34 +04:00
|
|
|
end += tcol.indent;
|
|
|
|
end = min(end, lineend);
|
|
|
|
} else
|
|
|
|
end = tcol.p->end;
|
|
|
|
}
|
2001-01-12 22:39:02 +03:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
if (flags & N) {
|
|
|
|
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
|
2001-01-12 22:39:02 +03:00
|
|
|
return number(tablepos, endkey, start, end, Rflag);
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
2001-01-12 22:39:02 +03:00
|
|
|
|
2000-10-07 20:39:34 +04:00
|
|
|
mask = cur_fld->mask;
|
|
|
|
lweight = cur_fld->weights;
|
|
|
|
for (; start < end; start++)
|
|
|
|
if (mask[*start]) {
|
|
|
|
if (*start <= 1) {
|
|
|
|
if (tablepos+2 >= endkey)
|
|
|
|
return (NULL);
|
|
|
|
*tablepos++ = lweight[1];
|
|
|
|
*tablepos++ = lweight[*start ? 2 : 1];
|
|
|
|
} else {
|
2001-02-19 22:52:27 +03:00
|
|
|
if (tablepos+1 >= endkey)
|
2001-01-12 22:39:02 +03:00
|
|
|
return (NULL);
|
2001-02-19 22:52:27 +03:00
|
|
|
*tablepos++ = lweight[*start];
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
*tablepos++ = lweight[0];
|
|
|
|
return (tablepos == endkey ? NULL : tablepos);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Uses the first bin to assign sign, expsign, 0, and the first
|
|
|
|
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
|
|
|
|
* When sorting in forward order:
|
|
|
|
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
|
|
|
|
* else use (0-99)->(2-102).
|
|
|
|
* If the exponent is >=61, use another byte for each additional 253
|
|
|
|
* in the exponent. Cutoff is at 567.
|
|
|
|
* To avoid confusing the exponent and the mantissa, use a field delimiter
|
|
|
|
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
|
|
|
|
* only time a field delimiter can come in that position.
|
|
|
|
* Reverse order is done analagously.
|
2001-02-19 23:50:17 +03:00
|
|
|
*/
|
2000-10-07 20:39:34 +04:00
|
|
|
|
2001-01-12 22:39:02 +03:00
|
|
|
static u_char *
|
2009-08-15 22:40:01 +04:00
|
|
|
number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int Rflag)
|
2000-10-07 20:39:34 +04:00
|
|
|
{
|
2000-10-16 00:46:33 +04:00
|
|
|
int or_sign, parity = 0;
|
|
|
|
int expincr = 1, exponent = -1;
|
2004-02-15 15:41:25 +03:00
|
|
|
int bite, expsign = 1, sign = 1, zeroskip = 0;
|
2004-02-15 14:54:17 +03:00
|
|
|
u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS;
|
2000-10-07 20:39:34 +04:00
|
|
|
u_char *nweights;
|
|
|
|
|
|
|
|
if (Rflag)
|
|
|
|
nweights = rnum;
|
|
|
|
else
|
|
|
|
nweights = fnum;
|
|
|
|
if (pos > bufend - 8)
|
|
|
|
return (NULL);
|
2001-02-19 23:50:17 +03:00
|
|
|
/*
|
|
|
|
* or_sign sets the sort direction:
|
|
|
|
* (-r: +/-)(sign: +/-)(expsign: +/-)
|
|
|
|
*/
|
2000-10-07 20:39:34 +04:00
|
|
|
or_sign = sign ^ expsign ^ Rflag;
|
2003-04-09 13:30:40 +04:00
|
|
|
SKIP_BLANKS(line);
|
2000-10-07 20:39:34 +04:00
|
|
|
if (*line == '-') { /* set the sign */
|
|
|
|
or_sign ^= 1;
|
|
|
|
sign = 0;
|
|
|
|
line++;
|
|
|
|
}
|
|
|
|
/* eat initial zeroes */
|
2001-02-19 23:50:17 +03:00
|
|
|
for (; *line == '0' && line < lineend; line++)
|
2004-02-15 15:41:25 +03:00
|
|
|
zeroskip = 1;
|
2000-10-07 20:39:34 +04:00
|
|
|
/* calculate exponents < 0 */
|
|
|
|
if (*line == DECIMAL) {
|
|
|
|
exponent = 1;
|
|
|
|
while (*++line == '0' && line < lineend)
|
|
|
|
exponent++;
|
|
|
|
expincr = 0;
|
|
|
|
expsign = 0;
|
|
|
|
}
|
|
|
|
/* next character better be a digit */
|
|
|
|
if (*line < '1' || *line > '9' || line >= lineend) {
|
2004-02-15 15:41:25 +03:00
|
|
|
/* only exit if we didn't skip any zero number */
|
|
|
|
if (!zeroskip) {
|
|
|
|
*pos++ = nweights[127];
|
|
|
|
return (pos);
|
|
|
|
}
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
if (expincr) {
|
|
|
|
for (tline = line-1; *++tline >= '0' &&
|
|
|
|
*tline <= '9' && tline < lineend;)
|
|
|
|
exponent++;
|
|
|
|
}
|
|
|
|
if (exponent > 567) {
|
|
|
|
*pos++ = nweights[sign ? (expsign ? 254 : 128)
|
|
|
|
: (expsign ? 0 : 126)];
|
|
|
|
warnx("exponent out of bounds");
|
|
|
|
return (pos);
|
|
|
|
}
|
|
|
|
bite = min(exponent, 61);
|
|
|
|
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
|
|
|
|
: (expsign ? 64-bite : 64+bite)];
|
|
|
|
if (bite >= 61) {
|
|
|
|
do {
|
|
|
|
exponent -= bite;
|
|
|
|
bite = min(exponent, 254);
|
|
|
|
*pos++ = nweights[or_sign ? 254-bite : bite];
|
|
|
|
} while (bite == 254);
|
|
|
|
}
|
|
|
|
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
|
|
|
|
for (; line < lineend; line++) {
|
|
|
|
if (*line >= '0' && *line <= '9') {
|
|
|
|
if (parity) {
|
|
|
|
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
|
|
|
|
: *line);
|
|
|
|
if (pos == bufend)
|
|
|
|
return (NULL);
|
|
|
|
if (*line != '0' || lastvalue != '0')
|
|
|
|
nonzero = pos;
|
|
|
|
} else
|
|
|
|
lastvalue = *line;
|
|
|
|
parity ^= 1;
|
2003-10-18 07:03:20 +04:00
|
|
|
} else if (*line == DECIMAL) {
|
|
|
|
if (!expincr) /* a decimal already occurred once */
|
2000-10-07 20:39:34 +04:00
|
|
|
break;
|
|
|
|
expincr = 0;
|
|
|
|
} else
|
|
|
|
break;
|
|
|
|
}
|
2004-02-15 15:41:25 +03:00
|
|
|
if ((parity && lastvalue != '0') || !nonzero) {
|
2000-10-07 20:39:34 +04:00
|
|
|
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
|
|
|
|
OFF_TENS[lastvalue] + '0';
|
|
|
|
} else
|
|
|
|
pos = nonzero;
|
|
|
|
if (pos > bufend-1)
|
|
|
|
return (NULL);
|
|
|
|
*pos++ = or_sign ? nweights[254] : nweights[0];
|
|
|
|
return (pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This forces a gap around the record delimiter
|
2009-08-16 01:26:32 +04:00
|
|
|
* Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
|
2000-10-07 20:39:34 +04:00
|
|
|
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
|
2001-02-19 23:50:17 +03:00
|
|
|
*/
|
2000-10-07 20:39:34 +04:00
|
|
|
void
|
2009-08-15 13:48:46 +04:00
|
|
|
num_init(void)
|
2000-10-07 20:39:34 +04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
|
|
|
|
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
|
|
|
|
OFF_TENS = TENS - '0';
|
|
|
|
OFF_NTENS = NEGTENS - '0';
|
|
|
|
for (i = 1; i < 10; i++) {
|
2001-02-19 23:50:17 +03:00
|
|
|
TENS[i] = TENS[i - 1] + 10;
|
|
|
|
NEGTENS[i] = NEGTENS[i - 1] - 10;
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
for (i = 0; i < REC_D; i++) {
|
|
|
|
fnum[i] = i;
|
2001-02-19 23:50:17 +03:00
|
|
|
rnum[255 - i] = i;
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
for (i = REC_D; i <255; i++) {
|
2001-02-19 23:50:17 +03:00
|
|
|
fnum[i] = i + 1;
|
|
|
|
rnum[255 - i] = i - 1;
|
2000-10-07 20:39:34 +04:00
|
|
|
}
|
|
|
|
}
|