a868903568
setting \n as the record delimited using a numeric value rather than literal \n - and to not incorrectly turn \n into a field separator if -R is used to make some other char the record separator (\n becomes a field separator in that case as long as the field separator remains "white space" but should not be in any other case - unless set explicitly of course.) Plus more cosmetic changes - the man page and usage are updated to make it more clear that the 2 (or 1) params to -k are not fields (field1 and field2) but specifiers of the beginning and end of one key field. There was an unused 'x' option in the GETOPTS string. The usage message is reformatted to display properly on both 80 col and > 80 col displays (on < 80 it will still probably look pretty ugly ... perhaps not quite so bad though), and is also updated to show the different usage for the -c case (and -C) from the others (only 1 file permitted) - the man page synopsis has a similar update. Using more than one of -c -C or -m generates a usage message rather than just ignoring the -m as it did before (there was no -C before of course). Aside from the bug fix to the interaction between -R and -t, there are no changes that affect the way anything is sorted (or read, or written). Discussed on tech-userlevel earlier this week.
202 lines
7.6 KiB
C
202 lines
7.6 KiB
C
/* $NetBSD: sort.h,v 1.36 2016/06/01 02:37:55 kre Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Ben Harris and Jaromir Dolecek.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*-
|
|
* Copyright (c) 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* Peter McIlroy.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)sort.h 8.1 (Berkeley) 6/6/93
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <err.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <limits.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define NBINS 256
|
|
|
|
/* values for masks, weights, and other flags. */
|
|
/* R and F get used to index weight_tables[] */
|
|
#define R 0x01 /* Field is reversed */
|
|
#define F 0x02 /* weight lower and upper case the same */
|
|
#define I 0x04 /* mask out non-printable characters */
|
|
#define D 0x08 /* sort alphanumeric characters only */
|
|
#define N 0x10 /* Field is a number */
|
|
#define BI 0x20 /* ignore blanks in icol */
|
|
#define BT 0x40 /* ignore blanks in tcol */
|
|
#define L 0x80 /* Sort by field length */
|
|
|
|
/* masks for delimiters: blanks, fields, and termination. */
|
|
#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
|
|
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
|
|
#define REC_D_F 4 /* '\n' default; from -R otherwise */
|
|
|
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
|
#define max(a, b) ((a) > (b) ? (a) : (b))
|
|
|
|
#define FCLOSE(file) { \
|
|
if (EOF == fclose(file)) \
|
|
err(2, "%p", file); \
|
|
}
|
|
|
|
#define EWRITE(ptr, size, n, f, fmt) { \
|
|
if (!fwrite(ptr, size, n, f)) \
|
|
err(2, fmt); \
|
|
}
|
|
|
|
/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
|
|
* in a sane way.
|
|
* Anyone who wants to sort data records longer than 2GB definitely needs a
|
|
* different program! */
|
|
typedef unsigned int length_t;
|
|
|
|
/* A record is a key/line pair starting at rec.data. It has a total length
|
|
* and an offset to the start of the line half of the pair.
|
|
*/
|
|
typedef struct recheader {
|
|
length_t length; /* total length of key and line */
|
|
length_t offset; /* to line */
|
|
int keylen; /* length of key */
|
|
u_char data[]; /* key then line */
|
|
} RECHEADER;
|
|
|
|
/* This is the column as seen by struct field. It is used by enterfield.
|
|
* They are matched with corresponding coldescs during initialization.
|
|
*/
|
|
struct column {
|
|
struct coldesc *p;
|
|
int num;
|
|
int indent;
|
|
};
|
|
|
|
/* a coldesc has a number and pointers to the beginning and end of the
|
|
* corresponding column in the current line. This is determined in enterkey.
|
|
*/
|
|
typedef struct coldesc {
|
|
u_char *start;
|
|
u_char *end;
|
|
int num;
|
|
} COLDESC;
|
|
|
|
/* A field has an initial and final column; an omitted final column
|
|
* implies the end of the line. Flags regulate omission of blanks and
|
|
* numerical sorts; mask determines which characters are ignored (from -i, -d);
|
|
* weights determines the sort weights of a character (from -f, -r).
|
|
*
|
|
* The first field contain the global flags etc.
|
|
* The list terminates when icol = 0.
|
|
*/
|
|
struct field {
|
|
struct column icol;
|
|
struct column tcol;
|
|
u_int flags;
|
|
u_char *mask;
|
|
u_char *weights;
|
|
};
|
|
|
|
struct filelist {
|
|
const char * const * names;
|
|
};
|
|
|
|
typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
|
|
typedef void (*put_func_t)(const RECHEADER *, FILE *);
|
|
|
|
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
|
|
extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
|
|
extern u_char d_mask[NBINS];
|
|
extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
|
|
extern int posix_sort;
|
|
extern int REC_D;
|
|
extern const char *tmpdir;
|
|
extern struct coldesc *clist;
|
|
extern int ncols;
|
|
|
|
#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
|
|
extern unsigned int debug_flags;
|
|
|
|
RECHEADER *allocrec(RECHEADER *, size_t);
|
|
void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
|
|
void concat(FILE *, FILE *);
|
|
length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
|
|
void fixit(int *, char **, const char *);
|
|
void fldreset(struct field *);
|
|
FILE *ftmp(void);
|
|
void fmerge(struct filelist *, int, FILE *, struct field *);
|
|
void save_for_merge(FILE *, get_func_t, struct field *);
|
|
void merge_sort(FILE *, put_func_t, struct field *);
|
|
void fsort(struct filelist *, int, FILE *, struct field *);
|
|
int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
|
|
int makekey(FILE *, RECHEADER *, u_char *, struct field *);
|
|
int makeline(FILE *, RECHEADER *, u_char *, struct field *);
|
|
void makeline_copydown(RECHEADER *);
|
|
int optval(int, int);
|
|
__dead void order(struct filelist *, struct field *, int);
|
|
void putline(const RECHEADER *, FILE *);
|
|
void putrec(const RECHEADER *, FILE *);
|
|
void putkeydump(const RECHEADER *, FILE *);
|
|
void rd_append(int, int, int, FILE *, u_char *, u_char *);
|
|
void radix_sort(RECHEADER **, RECHEADER **, int);
|
|
int setfield(const char *, struct field *, int);
|
|
void settables(void);
|