/* $NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Ben Harris and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Peter McIlroy. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Subroutines to generate sort keys. */ #include "sort.h" #ifndef lint __RCSID("$NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $"); __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ #define SKIP_BLANKS(ptr) { \ if (BLANK & d_mask[*(ptr)]) \ while (BLANK & d_mask[*(++(ptr))]); \ } #define NEXTCOL(pos) { \ if (!SEP_FLAG) \ while (BLANK & l_d_mask[*(++pos)]); \ while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ } static u_char *enterfield(u_char *, const u_char *, struct field *, int); static u_char *number(u_char *, const u_char *, u_char *, u_char *, int); #define DECIMAL '.' #define OFFSET 128 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ u_char fnum[NBINS], rnum[NBINS]; /* * constructs sort key with leading recheader, followed by the key, * followed by the original line. */ length_t enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data, size_t line_size, struct field fieldtable[]) /* keybuf: pointer to start of key */ { int i; u_char *l_d_mask; u_char *lineend, *pos; const u_char *endkey; u_char *keypos; struct coldesc *clpos; int col = 1; struct field *ftpos; l_d_mask = d_mask; pos = line_data - 1; lineend = line_data + line_size-1; /* don't include rec_delimiter */ for (i = 0; i < ncols; i++) { clpos = clist + i; for (; (col < clpos->num) && (pos < lineend); col++) { NEXTCOL(pos); } if (pos >= lineend) break; clpos->start = SEP_FLAG ? pos + 1 : pos; NEXTCOL(pos); clpos->end = pos; col++; if (pos >= lineend) { clpos->end = lineend; i++; break; } } for (; i <= ncols; i++) clist[i].start = clist[i].end = lineend; if (clist[0].start < line_data) clist[0].start++; /* * We write the sort keys (concatenated) followed by the * original line data (for output) as the 'keybuf' data. * keybuf->length is the number of key bytes + data bytes. * keybuf->offset is the number of key bytes. * We add a record separator (usually \n) after the key in case * (as is usual) we need to preserve the order of equal lines, * and for 'sort -u'. */ keypos = keybuf->data; endkey = keybuf_end - line_size - 1; if (endkey <= keypos) /* No room for any key bytes */ return 1; for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) { if ((keypos = enterfield(keypos, endkey, ftpos, fieldtable->flags)) == NULL) return (1); } *keypos++ = REC_D; keybuf->offset = keypos - keybuf->data; keybuf->length = keybuf->offset + line_size; memcpy(keypos, line_data, line_size); return (0); } /* * constructs a field (as defined by -k) within a key */ static u_char * enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld, int gflags) { u_char *start, *end, *lineend, *mask, *lweight; struct column icol, tcol; u_int flags; u_int Rflag; icol = cur_fld->icol; tcol = cur_fld->tcol; flags = cur_fld->flags; start = icol.p->start; lineend = clist[ncols].end; if (flags & BI) SKIP_BLANKS(start); start += icol.indent; start = min(start, lineend); if (!tcol.num) end = lineend; else { if (tcol.indent) { end = tcol.p->start; if (flags & BT) SKIP_BLANKS(end); end += tcol.indent; end = min(end, lineend); } else end = tcol.p->end; } if (flags & N) { Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; return number(tablepos, endkey, start, end, Rflag); } mask = cur_fld->mask; lweight = cur_fld->weights; for (; start < end; start++) if (mask[*start]) { if (*start <= 1) { if (tablepos+2 >= endkey) return (NULL); *tablepos++ = lweight[1]; *tablepos++ = lweight[*start ? 2 : 1]; } else { if (tablepos+1 >= endkey) return (NULL); *tablepos++ = lweight[*start]; } } *tablepos++ = lweight[0]; return (tablepos == endkey ? NULL : tablepos); } /* Uses the first bin to assign sign, expsign, 0, and the first * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). * When sorting in forward order: * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; * else use (0-99)->(2-102). * If the exponent is >=61, use another byte for each additional 253 * in the exponent. Cutoff is at 567. * To avoid confusing the exponent and the mantissa, use a field delimiter * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the * only time a field delimiter can come in that position. * Reverse order is done analagously. */ static u_char * number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int Rflag) { int or_sign, parity = 0; int expincr = 1, exponent = -1; int bite, expsign = 1, sign = 1, zeroskip = 0; u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS; u_char *nweights; if (Rflag) nweights = rnum; else nweights = fnum; if (pos > bufend - 8) return (NULL); /* * or_sign sets the sort direction: * (-r: +/-)(sign: +/-)(expsign: +/-) */ or_sign = sign ^ expsign ^ Rflag; SKIP_BLANKS(line); if (*line == '-') { /* set the sign */ or_sign ^= 1; sign = 0; line++; } /* eat initial zeroes */ for (; *line == '0' && line < lineend; line++) zeroskip = 1; /* calculate exponents < 0 */ if (*line == DECIMAL) { exponent = 1; while (*++line == '0' && line < lineend) exponent++; expincr = 0; expsign = 0; } /* next character better be a digit */ if (*line < '1' || *line > '9' || line >= lineend) { /* only exit if we didn't skip any zero number */ if (!zeroskip) { *pos++ = nweights[127]; return (pos); } } if (expincr) { for (tline = line-1; *++tline >= '0' && *tline <= '9' && tline < lineend;) exponent++; } if (exponent > 567) { *pos++ = nweights[sign ? (expsign ? 254 : 128) : (expsign ? 0 : 126)]; warnx("exponent out of bounds"); return (pos); } bite = min(exponent, 61); *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) : (expsign ? 64-bite : 64+bite)]; if (bite >= 61) { do { exponent -= bite; bite = min(exponent, 254); *pos++ = nweights[or_sign ? 254-bite : bite]; } while (bite == 254); } C_TENS = or_sign ? OFF_NTENS : OFF_TENS; for (; line < lineend; line++) { if (*line >= '0' && *line <= '9') { if (parity) { *pos++ = C_TENS[lastvalue] + (or_sign ? - *line : *line); if (pos == bufend) return (NULL); if (*line != '0' || lastvalue != '0') nonzero = pos; } else lastvalue = *line; parity ^= 1; } else if (*line == DECIMAL) { if (!expincr) /* a decimal already occurred once */ break; expincr = 0; } else break; } if ((parity && lastvalue != '0') || !nonzero) { *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : OFF_TENS[lastvalue] + '0'; } else pos = nonzero; if (pos > bufend-1) return (NULL); *pos++ = or_sign ? nweights[254] : nweights[0]; return (pos); } /* This forces a gap around the record delimiter * Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) */ void num_init(void) { int i; TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; OFF_TENS = TENS - '0'; OFF_NTENS = NEGTENS - '0'; for (i = 1; i < 10; i++) { TENS[i] = TENS[i - 1] + 10; NEGTENS[i] = NEGTENS[i - 1] - 10; } for (i = 0; i < REC_D; i++) { fnum[i] = i; rnum[255 - i] = i; } for (i = REC_D; i <255; i++) { fnum[i] = i + 1; rnum[255 - i] = i - 1; } }