4.4BSD-Lite2 contrib/sort

This commit is contained in:
bjh21 2000-10-07 16:39:34 +00:00
parent 5faa7161ce
commit 1d5d9b5b60
15 changed files with 3699 additions and 0 deletions

6
usr.bin/sort/Makefile Normal file
View File

@ -0,0 +1,6 @@
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= sort
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
.include <bsd.prog.mk>

893
usr.bin/sort/TEST/stests Normal file
View File

@ -0,0 +1,893 @@
# @(#)stests 8.1 (Berkeley) 6/6/93
#Latest version. My sort passes all tests because I wrote it.
#We differ only on 25E and 25H.
#(I found at least one bug in constructing test 25, and was driven
#to rewrite field parsing to clarify it.)
#
#In 25E, -k2.3,2.1b, the fields are not necessarily out of order.
#Even if they were, it would be legal (11752-3), although certainly
#justification for warning.
#
#On 25H, your answer is as defensible as mine. (Our suggestion
#*1 backs mine.)
# Tests for the Unix sort utility
# Test Posix features except for locale.
# Test some nonstandard features if present.
# Other tests should be made for files too big to fit in memory.
# Initialize switches for nonstandard features.
# Use parenthesized settings for supported features.
o=: # officially obsolescent features: +1 -2, misplaced -o (o=)
g=: # -g numeric sort including e-format numbers (g=)
M=: # -M sort by month names (M=)
s=: # -s stable, do not compare raw bytes on equal keys (s=)
y= # -y user-specified memory size (y=-y10000)
# Detect what features are supported, assuming bad options cause
# errors. Set switches accordingly.
echo obsolescent and nonstandard features recognized, if any:
if sort +0 </dev/null 2>/dev/null; then o=
echo ' +1 -2'; fi
if sort /dev/null -o xx 2>/dev/null; then o=
echo ' displaced -o'; fi
if sort -g </dev/null 2>/dev/null; then g=
echo ' -g g-format numbers'; fi
if sort -M </dev/null 2>/dev/null; then M=
echo ' -M months'; fi
if sort -s </dev/null 2>/dev/null; then s=
echo ' -s stable'; fi
if sort -y10000 </dev/null 2>/dev/null; then y=-y10000
echo ' -y space'; fi
if sort -z10000 </dev/null 2>/dev/null; then
echo ' -z size (not exercised)'; fi
if sort -T. </dev/null 2>/dev/null; then
echo ' -T tempdir (not exercised)'; fi
export TEST # major sequence number of test
trap "rm -f in in1 out xx -k xsort linecount fields; exit" 0 1 2 13 15
# xsort testno options
# Sort file "in" with specified options.
# Compare with file "out" if that is supplied,
# otherwise make plausibility checks on output
# "sum" must be dumb; insensitive to the
# order of lines within a file.
# System V sum is suitable; sum -5 is the v10 equivalent.
PATH=.:$PATH
export PATH
cat <<'!' >xsort; chmod +x xsort
X=$1; shift
if sort "$@" in >xx && sort -c "$@" xx
then
if test -f out
then
cmp xx out >/dev/null && exit 0
echo $TEST$X comparison failed
else
test "`cksum -o2 <in`" = "`cksum -o2 <xx`" && exit 0
echo $TEST$X checksum failed
fi
else
echo $TEST$X failed
fi
exit 1
!
# linecount testno file count
# declares the given "testno" to be in error if number of
# lines in "file" differs from "count"
cat <<'!' >linecount; chmod +x linecount
awk 'END{ if(NR!='$3') print "'$TEST$1' failed" }' $2
!
rm -f out
#---------------------------------------------------------------
TEST=01; echo $TEST # -c status, checksum
# obsolescent features go together
cat <<! >in
b
a
!
rm -f out -o
sort -c in 2>/dev/null && echo ${TEST}A failed
xsort B || '"cksum"' is probably unsuitable - see comments
$o sort +0 in -o in || echo ${TEST}c failed
#---------------------------------------------------------------
TEST=02; echo $TEST # output from -c
cat <<! >in
x
y
!
sort -cr in >out 2>xx && echo ${TEST}A failed
test -s out && echo ${TEST}B failed
test -s xx && echo option -c is noisy "(probably legal)"
test -s xx || echo option -c is quiet "(legal, not classical)"
#---------------------------------------------------------------
TEST=03; echo $TEST # -n
cat <<! >in
-99.0
-99.1
-.0002
-10
2
0010.000000000000000000000000000000000001
10
3x
x
!
cat <<! >out
-99.1
-99.0
-10
-.0002
x
2
3x
10
0010.000000000000000000000000000000000001
!
xsort "" -n
#---------------------------------------------------------------
TEST=04; echo $TEST # -b without fields, piping, -c status return
cat <<! >in
b
a
!
cp in out
xsort A -b
cat in | sort | cat >xx
cmp xx out >/dev/null || echo ${TEST}B failed
sort in | sort -cr 2>/dev/null && echo ${TEST}C failed
#---------------------------------------------------------------
TEST=05; echo $TEST # fields, reverse fields, -c status return
cat <<! >in
b b p
a b q
x a
!
cat <<! >out
x a
a b q
b b p
!
$o xsort A +1 -2
$o xsort B +1 -2 +2r
xsort C -k 2,2
xsort D -k 2,2 -k 3r
xsort E -k 2,2.0
xsort F -k 2,2 -k 1,1 -k 3
sort -c -k 2 in 2>/dev/null && ${TEST}G failed
#---------------------------------------------------------------
TEST=06; echo $TEST # -t
cat <<! >in
a:
a!
!
cp in out
$o xsort A -t : -r +0
$o xsort B -t : +0 -1
xsort C -t : -r -k 1
xsort D -t : -k 1,1
#---------------------------------------------------------------
TEST=07; echo $TEST # -t, character positions in fields
# -t: as 1 arg is not strictly conforming, but classical
cat <<! >in
: ab
:bac
!
cat <<! >out
:bac
: ab
!
$o xsort A -b -t: +1.1
$o xsort B -t: +1.1r
xsort C -b -t: -k 2.2
xsort D -t: -k 2.2r
#---------------------------------------------------------------
TEST=08; echo $TEST # space and tab as -t characters
cat <<! >in
b c
b c
b c
!
cp in out
xsort A -t ' ' -k2,2
xsort B -t ' ' -k2.1,2.0
cat <<! >out
b c
b c
b c
!
xsort C -t ' ' -k2,2
xsort D -t ' ' -k2.1,2.0
cat <<! >out
b c
b c
b c
!
xsort E -k2
cat <<! >out
b c
b c
b c
!
xsort F -k2b
#---------------------------------------------------------------
TEST=09; echo $TEST # alphabetic as -t character
cat <<! >in
zXa
yXa
zXb
!
cp in out
xsort "" -tX -k2 -k1r,1
#---------------------------------------------------------------
TEST=10; echo $TEST # -m
cat <<! >in
a
ab
ab
bc
ca
!
cat <<! >in1
Z
a
aa
ac
c
!
cat <<! >out
Z
a
a
aa
ab
ab
ac
bc
c
ca
!
sort -m in in1 >xx
cmp xx out >/dev/null || echo $TEST failed
#---------------------------------------------------------------
TEST=11; echo $TEST # multiple files, -o overwites input, -m, -mu
cat <<! >in
a
b
c
d
!
sort -o xx in in in in in in in in in in in in in in in in in
linecount A xx 68
sort -o in -mu in in in in in in in in in in in in in in in in in
linecount B in 4
sort -o in -m in in in in in in in in in in in in in in in in in
cmp in xx >/dev/null || echo ${TEST}C failed
#---------------------------------------------------------------
TEST=12; echo $TEST # does -mu pick the first among equals?
cat <<! >in
3B
3b
3B2
~3B2
4.1
41
5
5.
!
cat <<! >out
3B
3B2
4.1
5
!
xsort A -mudf || echo "(other behavior is legal, not classical)"
xsort B -mudf -k1 || echo "(other behavior is legal, not classical)"
#---------------------------------------------------------------
TEST=13; echo $TEST # long records (>8000 bytes, keys >16000), -r
awk '
BEGIN { x="x"
for(i=1; i<=12; i++) x = x x
for(i=15; i<=25; i++) print x i
}' >in
awk '
BEGIN { x="x"
for(i=1; i<=12; i++) x = x x
for(i=25; i>=15; i--) print x i
}' >out
xsort A -r
xsort B -k 1,1r -k 1
#---------------------------------------------------------------
TEST=14; echo $TEST "(3 long parts)"
awk 'BEGIN { for(i=0; i<100000; i++) print rand() }' | grep -v e >in
rm -f out
xsort A; echo $TEST "(part A done)"
xsort B -n; echo $TEST "(part B done)"
# next test is unclean: xx is a hidden side-effect of xsort
awk '
$0 < x { print "test '${TEST}C' failed"; exit }
$0 "" != x { print >"out"; x = $0 }
' xx
xsort C -n -u
#---------------------------------------------------------------
TEST=15; echo $TEST "(long)" # force intermediate files if possible
awk 'BEGIN { for(i=0; i<20000; i++) print rand() }' >in
rm -f out
xsort A -r $y
sort -r in | awk '$0 "x" != x { print ; x = $0 "x" }' >out
xsort B -u -r $y
#---------------------------------------------------------------
TEST=16; echo $TEST # -nr, -nm, file name -
awk 'BEGIN { for(i=-100; i<=100; i+=2) printf "%.10d\n", i }' >in
awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -nr in - >xx
awk '$0+0 != 101-NR { print "'${TEST}A' failed"; exit }' xx
awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -mn in - >xx
awk '$0+0 != -101+NR { print "'${TEST}B' failed"; exit }' xx
#---------------------------------------------------------------
TEST=17; echo $TEST # -d, fields without end, modifier override
cat <<! >in
a-B
a+b
a b
A+b
a b
!
cat <<! >out
a b
a b
A+b
a-B
a+b
!
$o xsort A -df +0 +0d
xsort B -df -k 1 -k 1d
#---------------------------------------------------------------
TEST=18; echo $TEST # -u on key only
cat <<! >in
12 y
13 z
12 x
!
cat <<! >out
12 x
12 y
13 z
!
$o xsort A +0 -1
xsort B -k 1,1
sort -u -k 1,1 in >xx
linecount C xx 2
#---------------------------------------------------------------
TEST=19; echo $TEST # -i, -d, -f
cat <<! >xx.c
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
main(){ run(0, 011); /* 012=='\n' */
run(013, 0377); }
!
cc xx.c
a.out >in
cat <<! >xx.c
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
main(){ run(0, 011);
run(013, ' '-1);
run(0177, 0377);
run(' ', 0176); }
!
cc xx.c
a.out >out
xsort A -i -k 2
cat <<! >xx.c
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
main(){ run(0, 010); /* 011=='\t', 012=='\n' */
run(013, ' '-1);
run(' '+1, '0'-1);
run('9'+1, 'A'-1);
run('Z'+1, 'a'-1);
run('z'+1, 0377);
run('\t', '\t');
run(' ', ' ');
run('0', '9');
run('A', 'Z');
run('a', 'z'); }
!
cc xx.c
a.out >out
xsort B -d -k 2
cat <<! >xx.c
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
main(){ int i;
run(0, 011);
run(013, 'A'-1);
for(i='A'; i<='Z'; i++)
printf("%.3o %c\n%.3o %c\n",i,i,i+040,i+040);
run('Z'+1, 'a'-1);
run('z'+1, 0377); }
!
cc xx.c
a.out >out
rm xx.c
xsort C -f -k 2
#---------------------------------------------------------------
TEST=20; echo $TEST # -d, -f, -b applies only to fields
cat <<! >in
b
'C
a
!
cp in out
xsort A -d
xsort B -f
cat <<! >out
b
a
'C
!
xsort C -dfb
#---------------------------------------------------------------
TEST=21; echo $TEST # behavior of null bytes
cat <<'!' >xx.c
main() { printf("%cb\n%ca\n",0,0); }
!
cc xx.c
a.out >in
sort in >xx
cmp in xx >/dev/null && echo ${TEST}A failed
test "`wc -c <in`" = "`wc -c <xx`" || echo ${TEST}B failed
rm xx.c a.out
#---------------------------------------------------------------
TEST=22; echo $TEST # field limits
cat <<! >in
a 2
a 1
b 2
b 1
!
cat <<! >out
b 1
b 2
a 1
a 2
!
xsort "" -r -k1,1 -k2n
#---------------------------------------------------------------
TEST=23; echo $TEST # empty file
sort -o xx </dev/null
cmp xx /dev/null 2>/dev/null || echo ${TEST}A failed
sort -c </dev/null || echo ${TEST}B failed
sort -cu </dev/null || echo ${TEST}C failed
#---------------------------------------------------------------
TEST=24; echo $TEST # many fields
cat <<! >in
0:2:3:4:5:6:7:8:9
1:1:3:4:5:6:7:8:9
1:2:2:4:5:6:7:8:9
1:2:3:3:5:6:7:8:9
1:2:3:4:4:6:7:8:9
1:2:3:4:5:5:7:8:9
1:2:3:4:5:6:6:8:9
1:2:3:4:5:6:7:7:9
1:2:3:4:5:6:7:8:8
!
cat <<! >out
1:2:3:4:5:6:7:8:8
1:2:3:4:5:6:7:7:9
1:2:3:4:5:6:6:8:9
1:2:3:4:5:5:7:8:9
1:2:3:4:4:6:7:8:9
1:2:3:3:5:6:7:8:9
1:2:2:4:5:6:7:8:9
1:1:3:4:5:6:7:8:9
0:2:3:4:5:6:7:8:9
!
xsort "" -t: -k9 -k8 -k7 -k6 -k5 -k4 -k3 -k2 -k1
#---------------------------------------------------------------
TEST=25; echo $TEST # variously specified alpha fields
# numbers give the correct orderings
cat <<! >in
01:04:19:01:16:01:21:01 a
02:03:13:15:13:19:15:02 a
03:02:07:09:07:13:09:03 a
04:01:01:03:01:07:03:04 a
05:08:20:16:17:02:20:05 aa
06:07:14:18:14:20:14:06 aa
07:06:08:10:08:14:08:07 aa
08:05:02:04:02:08:02:08 aa
09:16:22:02:22:04:24:13 b
10:15:16:20:19:22:18:14 b
11:14:10:12:10:16:12:15 b
12:13:04:06:04:10:06:16 b
13:24:24:22:24:06:22:21 bb
14:23:18:24:21:24:16:22 bb
15:22:12:14:12:18:10:23 bb
16:21:06:08:06:12:04:24 bb
17:12:21:21:18:03:19:09 ab
18:11:15:19:15:21:13:10 ab
19:10:09:11:09:15:07:11 ab
20:09:03:05:03:09:01:12 ab
21:20:23:17:23:05:23:17 ba
22:19:17:23:20:23:17:18 ba
23:18:11:13:11:17:11:19 ba
24:17:05:07:05:11:05:20 ba
!
sort -k2b -k2 in >xx &&
sort -c -t: -k2n xx 2>/dev/null || echo ${TEST}A failed
sort -k2,2.1b -k2 in >xx &&
sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}B failed
sort -k2.3 -k2 in >xx &&
sort -c -t: -k4n xx 2>/dev/null || echo ${TEST}C failed
sort -k2b,2.3 -k2 in >xx &&
sort -c -t: -k5n xx 2>/dev/null || echo ${TEST}D failed
sort -k2.3,2.1b -k2 in >xx &&
sort -c -t: -k6n xx 2>/dev/null || echo ${TEST}E failed
sort -k2,2.1b -k2r in >xx &&
sort -c -t: -k7n xx 2>/dev/null || echo ${TEST}F failed
sort -b -k2,2 -k2 in >xx &&
sort -c -t: -k8n xx 2>/dev/null || echo ${TEST}G failed
sort -b -k2,2b -k2 in >xx && # perhaps same as G
sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}H failed\
"(standard is not clear on this)"
#---------------------------------------------------------------
TEST=26; echo $TEST # empty fields, out of bounds fields
cat <<! >in
0 5
1 4
2 3
3 2
4 1
5 0
!
cp in out
xsort "" -k2.2,2.1 -k2.3,2.4
#---------------------------------------------------------------
TEST=27; echo $TEST # displaced -o
rm -f out
$o sort /dev/null -o out || $o echo ${TEST}B failed
$o test -f out || $o echo ${TEST}C failed
#---------------------------------------------------------------
TEST=28; echo $TEST # apparently nonmonotone field specs
cat <<! >in
aaaa c
x a
0 b
!
cp in out
$o xsort A +1 -0.3 +1.4 -1.5
xsort B -k2,1.3 -k2.5,2.5
#---------------------------------------------------------------
TEST=29; echo $TEST # determination of end of option list
cat >-k <<!
x
!
rm -f out -c
sort -- -k </dev/null >xx || echo ${TEST}A argument failed
cmp xx -k || echo ${TEST}A comparison failed
sort - -c </dev/null 2>/dev/null && echo ${TEST}B failed
#---------------------------------------------------------------
TEST=30; echo $TEST # missing newline
awk 'BEGIN{ printf "%s", "x"}' | sort >xx
wc -c <xx | awk '$1!=2{ print "'${TEST}' failed" }'
#---------------------------------------------------------------
TEST=31; echo $TEST # -M, multiple fields
cat <<! >in
jan 10 1900
Feb 26 1900
feb 25 1900
January xx 1900
August 11 1900
jan 15 1990
feb 22 1990
mar 15 1990
apr 1 1990
may 45 1990
jun 14 1990
jul 4 1990
aug 1~ 1990
aug 11 1990
sep 1 1990
oct 12 1990
nov 24 1990
dec 25 1990
never 3 1990
Dec 25 1990
!
cat <<! >out
January xx 1900
jan 10 1900
feb 25 1900
Feb 26 1900
August 11 1900
never 3 1990
jan 15 1990
feb 22 1990
mar 15 1990
apr 1 1990
may 45 1990
jun 14 1990
jul 4 1990
aug 1~ 1990
aug 11 1990
sep 1 1990
oct 12 1990
nov 24 1990
Dec 25 1990
dec 25 1990
!
$M xsort "" -k3n -k1M -k2n
#---------------------------------------------------------------
TEST=32; echo $TEST # -M case insensitivity, -r
cat <<! >in
x
june
january
december
!
cat <<! >out
december
june
january
x
!
$M xsort "" -Mr
#---------------------------------------------------------------
TEST=33; echo $TEST # -g
cat <<! >in
2
1
10
.2
1e
1E1
1e.
!
cat <<! >out
.2
1
1e
1e.
2
10
1E1
!
$g xsort "" -g
#---------------------------------------------------------------
TEST=34; echo $TEST # -g wide operands
cat <<! >in
.99999999999999999999
099999999999999999999e-21
099999999999999999999e-19
.1e1
!
cat <<! >out
099999999999999999999e-21
.99999999999999999999
.1e1
099999999999999999999e-19
!
$g xsort A -g
cat <<! >out
.1e1
.99999999999999999999
099999999999999999999e-19
099999999999999999999e-21
!
xsort B -n
#---------------------------------------------------------------
TEST=35; echo $TEST #-g, -u with different fp reps
cat <<! >in
+0
-0
0.10
+.1
-.1
-100e-3
x
!
cat <<! >out
-.1
-100e-3
+0
-0
x
+.1
0.10
!
$g xsort A -g
$g sort -gu in >xx && $g sort -c -gu xx || echo ${TEST}B failed
$g linecount C xx 3
#---------------------------------------------------------------
TEST=36; echo $TEST # -s
cat <<! >in
a 2
b 1
c 2
a 1
b 2
c 1
!
cat <<! >out
a 2
a 1
b 1
b 2
c 2
c 1
!
$s xsort "" -s -k1,1
#---------------------------------------------------------------
TEST=37; echo $TEST # -s, multiple files
cat <<! >in
a 2
c 2
!
cat <<! >in1
a 1
b 1
c 1
!
cat <<! >out
c 2
b 1
a 2
!
$s sort -smru -k1,1 in in in1 in1 >xx
$s cmp xx out >/dev/null || echo $TEST failed
#---------------------------------------------------------------
TEST=38; echo $TEST # -s
$s awk '
BEGIN {
for(i=1; i<50; i++)
for(j=1; j<=i; j++) {
print i, 2 >"in"
print i, 1 >"in1"
}
}'
$s sort -m -s -k1,1n in in1 >out
$s awk '
func stop() { print "'$TEST' failed"; exit }
$1!=last1 { if(count!=last1 || $2!=2) stop();
count = 0}
$1==last1 && $2!=last2 { if(count!=last1 || $2!=1) stop();
count = 0 }
{ count++; last1 = $1; last2 = $2 }
' out

188
usr.bin/sort/append.c Normal file
View File

@ -0,0 +1,188 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#include "sort.h"
#include <stdlib.h>
#include <string.h>
#define OUTPUT { \
if ((n = cpos - ppos) > 1) { \
for (; ppos < cpos; ++ppos) \
*ppos -= odepth; \
ppos -= n; \
radixsort(ppos, n, wts1, REC_D); \
for (; ppos < cpos; ppos++) { \
prec = (RECHEADER *) (*ppos - sizeof(TRECHEADER));\
put(prec, fd); \
} \
} else put(prec, fd); \
}
/*
* copy sorted lines to output; check for uniqueness
*/
void
append(keylist, nelem, depth, fd, put, ftbl)
u_char **keylist;
int nelem;
register int depth;
FILE *fd;
void (*put)(RECHEADER *, FILE *);
struct field *ftbl;
{
register u_char *wts, *wts1;
register n, odepth;
register u_char **cpos, **ppos, **lastkey;
register u_char *cend, *pend, *start;
register struct recheader *crec, *prec;
if (*keylist == '\0' && UNIQUE)
return;
wts1 = wts = ftbl[0].weights;
if ((!UNIQUE) && SINGL_FLD) {
if (ftbl[0].flags & F && ftbl[0].flags & R)
wts1 = Rascii;
else if (ftbl[0].flags & F)
wts1 = ascii;
odepth = depth;
}
lastkey = keylist + nelem;
depth += sizeof(TRECHEADER);
if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
ppos = keylist;
prec = (RECHEADER *) (*ppos - depth);
if (UNIQUE)
put(prec, fd);
for (cpos = keylist+1; cpos < lastkey; cpos++) {
crec = (RECHEADER *) (*cpos - depth);
if (crec->length == prec->length) {
pend = (u_char *) &prec->offset + prec->length;
cend = (u_char *) &crec->offset + crec->length;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fd);
ppos = cpos;
prec = crec;
}
} else {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fd);
ppos = cpos;
prec = crec;
}
}
if (!UNIQUE) { OUTPUT; }
} else if (UNIQUE) {
ppos = keylist;
prec = (RECHEADER *) (*ppos - depth);
put(prec, fd);
for (cpos = keylist+1; cpos < lastkey; cpos++) {
crec = (RECHEADER *) (*cpos - depth);
if (crec->offset == prec->offset) {
pend = (u_char *) &prec->offset + prec->offset;
cend = (u_char *) &crec->offset + crec->offset;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
ppos = cpos;
prec = crec;
put(prec, fd);
}
} else {
ppos = cpos;
prec = crec;
put(prec, fd);
}
}
} else for (cpos = keylist; cpos < lastkey; cpos++) {
crec = (RECHEADER *) (*cpos - depth);
put(crec, fd);
}
}
/*
* output the already sorted eol bin.
*/
void
rd_append(binno, infl0, nfiles, outfd, buffer, bufend)
u_char *buffer, *bufend;
int binno, nfiles;
union f_handle infl0;
FILE *outfd;
{
struct recheader *rec;
rec = (RECHEADER *) buffer;
if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
putline(rec, outfd);
while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
bufend, 0) == 0) {
if (!UNIQUE)
putline(rec, outfd);
}
}
}
/*
* append plain text--used after sorting the biggest bin.
*/
void
concat(a, b)
FILE *a, *b;
{
int nread;
char buffer[4096];
rewind(b);
while ((nread = fread(buffer, 1, 4096, b)) > 0)
EWRITE(buffer, 1, nread, a);
}

67
usr.bin/sort/extern.h Normal file
View File

@ -0,0 +1,67 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.1 (Berkeley) 6/6/93
*/
void append __P((u_char **, int, int, FILE *, void (*)(), struct field *));
void concat __P((FILE *, FILE *));
length_t enterkey __P((struct recheader *,
DBT *, int, struct field *));
void fixit __P((int *, char **));
void fldreset __P((struct field *));
FILE *ftmp __P((void));
void fmerge __P((int, union f_handle,
int, int (*)(), FILE *, void (*)(), struct field *));
void fsort __P((int, int, union f_handle, int, FILE *, struct field *));
int geteasy __P((int, union f_handle,
int, struct recheader *, u_char *, struct field *));
int getnext __P((int, union f_handle,
int, struct recheader *, u_char *, struct field *));
int makekey __P((int, union f_handle,
int, struct recheader *, u_char *, struct field *));
int makeline __P((int, union f_handle,
int, struct recheader *, u_char *, struct field *));
void merge __P((int, int, int (*)(), FILE *, void (*)(), struct field *));
void num_init __P((void));
void onepass __P((u_char **, int, long, long *, u_char *, FILE *));
int optval __P((int, int));
void order __P((union f_handle, int (*)(), struct field *));
void putline __P((struct recheader *, FILE *));
void putrec __P((struct recheader *, FILE *));
void rd_append __P((int, union f_handle, int, FILE *, u_char *, u_char *));
int seq __P((FILE *, DBT *, DBT *));
int setfield __P((char *, struct field *, int));
void settables __P((int));

319
usr.bin/sort/fields.c Normal file
View File

@ -0,0 +1,319 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/* Subroutines to generate sort keys. */
#include "sort.h"
#define blancmange(ptr) { \
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
}
extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
#define DECIMAL '.'
#define OFFSET 128
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
u_char fnum[NBINS], rnum[NBINS];
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(keybuf, line, size, fieldtable)
struct recheader *keybuf; /* pointer to start of key */
DBT *line;
int size;
struct field fieldtable[];
{
int i;
register u_char *l_d_mask;
register u_char *lineend, *pos;
u_char *endkey, *keypos;
register struct coldesc *clpos;
register int col = 1;
struct field *ftpos;
l_d_mask = d_mask;
pos = (u_char *) line->data - 1;
lineend = (u_char *) line->data + line->size-1;
/* don't include rec_delimiter */
keypos = keybuf->data;
for (i = 0; i < ncols; i++) {
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++)
{ NEXTCOL(pos); }
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
++i;
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < (u_char *) line->data)
++clist[0].start;
endkey = (u_char *) keybuf + size - line->size;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
if (UNIQUE)
*(keypos-1) = REC_D;
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line->size;
if (keybuf->length + sizeof(TRECHEADER) > size)
return (1); /* line too long for buffer */
memcpy(keybuf->data + keybuf->offset, line->data, line->size);
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
u_char *
enterfield(tablepos, endkey, cur_fld, gflags)
struct field *cur_fld;
register u_char *tablepos, *endkey;
int gflags;
{
register u_char *start, *end, *lineend, *mask, *lweight;
struct column icol, tcol;
register u_int flags;
u_int Rflag;
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
blancmange(start);
start += icol.indent;
start = min(start, lineend);
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT) blancmange(end);
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
if (flags & N) {
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
tablepos = number(tablepos, endkey, start, end, Rflag);
return (tablepos);
}
mask = alltable;
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++)
if (mask[*start]) {
if (*start <= 1) {
if (tablepos+2 >= endkey)
return (NULL);
*tablepos++ = lweight[1];
*tablepos++ = lweight[*start ? 2 : 1];
} else {
*tablepos++ = lweight[*start];
if (tablepos == endkey)
return (NULL);
}
}
*tablepos++ = lweight[0];
return (tablepos == endkey ? NULL : tablepos);
}
/* Uses the first bin to assign sign, expsign, 0, and the first
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
* When sorting in forward order:
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
* else use (0-99)->(2-102).
* If the exponent is >=61, use another byte for each additional 253
* in the exponent. Cutoff is at 567.
* To avoid confusing the exponent and the mantissa, use a field delimiter
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
* only time a field delimiter can come in that position.
* Reverse order is done analagously.
*/
u_char *
number(pos, bufend, line, lineend, Rflag)
register u_char *line, *pos, *bufend, *lineend;
int Rflag;
{
register int or_sign, parity = 0;
register int expincr = 1, exponent = -1;
int bite, expsign = 1, sign = 1;
register u_char lastvalue, *nonzero, *tline, *C_TENS;
u_char *nweights;
if (Rflag)
nweights = rnum;
else
nweights = fnum;
if (pos > bufend - 8)
return (NULL);
/* or_sign sets the sort direction:
* (-r: +/-)(sign: +/-)(expsign: +/-) */
or_sign = sign ^ expsign ^ Rflag;
blancmange(line);
if (*line == '-') { /* set the sign */
or_sign ^= 1;
sign = 0;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++);
/* calculate exponents < 0 */
if (*line == DECIMAL) {
exponent = 1;
while (*++line == '0' && line < lineend)
exponent++;
expincr = 0;
expsign = 0;
}
/* next character better be a digit */
if (*line < '1' || *line > '9' || line >= lineend) {
*pos++ = nweights[127];
return (pos);
}
if (expincr) {
for (tline = line-1; *++tline >= '0' &&
*tline <= '9' && tline < lineend;)
exponent++;
}
if (exponent > 567) {
*pos++ = nweights[sign ? (expsign ? 254 : 128)
: (expsign ? 0 : 126)];
warnx("exponent out of bounds");
return (pos);
}
bite = min(exponent, 61);
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
: (expsign ? 64-bite : 64+bite)];
if (bite >= 61) {
do {
exponent -= bite;
bite = min(exponent, 254);
*pos++ = nweights[or_sign ? 254-bite : bite];
} while (bite == 254);
}
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
for (; line < lineend; line++) {
if (*line >= '0' && *line <= '9') {
if (parity) {
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
: *line);
if (pos == bufend)
return (NULL);
if (*line != '0' || lastvalue != '0')
nonzero = pos;
} else
lastvalue = *line;
parity ^= 1;
} else if(*line == DECIMAL) {
if(!expincr) /* a decimal already occurred once */
break;
expincr = 0;
} else
break;
}
if (parity && lastvalue != '0') {
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
OFF_TENS[lastvalue] + '0';
} else
pos = nonzero;
if (pos > bufend-1)
return (NULL);
*pos++ = or_sign ? nweights[254] : nweights[0];
return (pos);
}
/* This forces a gap around the record delimiter
* Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
*/
void
num_init()
{
int i;
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
OFF_TENS = TENS - '0';
OFF_NTENS = NEGTENS - '0';
for (i = 1; i < 10; i++) {
TENS[i] = TENS[i-1] + 10;
NEGTENS[i] = NEGTENS[i-1] - 10;
}
for (i = 0; i < REC_D; i++) {
fnum[i] = i;
rnum[255-i] = i;
}
for (i = REC_D; i <255; i++) {
fnum[i] = i+1;
rnum[255-i] = i-1;
}
}

338
usr.bin/sort/files.c Normal file
View File

@ -0,0 +1,338 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#include "sort.h"
#include "fsort.h"
#include <string.h>
/*
* this is the subroutine for file management for fsort().
* It keeps the buffers for all temporary files.
*/
int
getnext(binno, infl0, nfiles, pos, end, dummy)
int binno, nfiles;
union f_handle infl0;
register struct recheader *pos;
register u_char *end;
struct field *dummy;
{
register int i;
register u_char *hp;
static long nleft = 0;
static int cnt = 0, flag = -1;
static u_char maxb = 0;
static FILE *fd;
if (nleft == 0) {
if (binno < 0) /* reset files. */ {
for (i = 0; i < nfiles; i++) {
rewind(fstack[infl0.top + i].fd);
fstack[infl0.top + i].max_o = 0;
}
flag = -1;
nleft = cnt = 0;
return(-1);
}
maxb = fstack[infl0.top].maxb;
for (; nleft == 0; cnt++) {
if (cnt >= nfiles) {
cnt = 0;
return (EOF);
}
fd = fstack[infl0.top + cnt].fd;
hp = (u_char *) &nleft;
for (i = sizeof(TRECHEADER); i; --i)
*hp++ = getc(fd);
if (binno < maxb)
fstack[infl0.top+cnt].max_o
+= sizeof(nleft) + nleft;
else if (binno == maxb) {
if (binno != fstack[infl0.top].lastb) {
fseek(fd, fstack[infl0.top+
cnt].max_o, SEEK_SET);
fread(&nleft, sizeof(nleft), 1, fd);
}
if (nleft == 0)
fclose(fd);
} else if (binno == maxb + 1) { /* skip a bin */
fseek(fd, nleft, SEEK_CUR);
fread(&nleft, sizeof(nleft), 1, fd);
flag = cnt;
}
}
}
if ((u_char *) pos > end - sizeof(TRECHEADER))
return (BUFFEND);
hp = (u_char *) pos;
for (i = sizeof(TRECHEADER); i ; --i)
*hp++ = (u_char) getc(fd);
if (end - pos->data < pos->length) {
for (i = sizeof(TRECHEADER); i ; i--)
ungetc(*--hp, fd);
return (BUFFEND);
}
fread(pos->data, pos->length, 1, fd);
nleft -= pos->length + sizeof(TRECHEADER);
if (nleft == 0 && binno == fstack[infl0.top].maxb)
fclose(fd);
return (0);
}
/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
int
makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
int flno, nfiles;
union f_handle filelist;
struct recheader *buffer;
u_char *bufend;
struct field *dummy2;
{
static char *opos;
register char *end, *pos;
static int fileno = 0, overflow = 0;
static FILE *fd = 0;
register int c;
pos = (char *) buffer->data;
end = min((char *) bufend, pos + MAXLLEN);
if (overflow) {
memmove(pos, opos, bufend - (u_char *) opos);
pos += ((char *) bufend - opos);
overflow = 0;
}
for (;;) {
if (flno >= 0) {
if (!(fd = fstack[flno].fd))
return (EOF);
} else if (!fd) {
if (fileno >= nfiles) return(EOF);
if (!(fd = fopen(filelist.names[fileno], "r")))
err(2, "%s", filelist.names[fileno]);
++fileno;
}
while ((pos < end) && ((c = getc(fd)) != EOF)) {
if ((*pos++ = c) == REC_D) {
buffer->offset = 0;
buffer->length = pos - (char *) buffer->data;
return (0);
}
}
if (pos >= end && end == (char *) bufend) {
if ((char *) buffer->data < end) {
overflow = 1;
opos = (char *) buffer->data;
}
return (BUFFEND);
} else if (c == EOF) {
if (buffer->data != (u_char *) pos) {
warnx("last character not record delimiter");
*pos++ = REC_D;
buffer->offset = 0;
buffer->length = pos - (char *) buffer->data;
return(0);
}
FCLOSE(fd);
fd = 0;
if(flno >= 0) fstack[flno].fd = 0;
} else {
buffer->data[100] = '\000';
warnx("line too long:ignoring %s...", buffer->data);
}
}
}
/*
* This generates keys. It's only called in the first fsort pass
*/
int
makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
int flno, nfiles;
union f_handle filelist;
struct recheader *buffer;
u_char *bufend;
struct field *ftbl;
{
static int (*get)();
static int fileno = 0;
static FILE *dbdesc = 0;
static DBT dbkey[1], line[1];
static int overflow = 0;
int c;
if (overflow) {
overflow = 0;
enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
return (0);
}
for (;;) {
if (flno >= 0) {
get = seq;
if (!(dbdesc = fstack[flno].fd))
return(EOF);
} else if (!dbdesc) {
if (fileno >= nfiles)
return (EOF);
dbdesc = fopen(filelist.names[fileno], "r");
if (!dbdesc)
err(2, "%s", filelist.names[fileno]);
++fileno;
get = seq;
}
if (!(c = get(dbdesc, line, dbkey))) {
if ((signed)line->size > bufend - buffer->data)
overflow = 1;
else
overflow = enterkey(buffer, line,
bufend - (u_char *) buffer, ftbl);
if (overflow)
return (BUFFEND);
else
return (0);
}
if (c == EOF) {
FCLOSE(dbdesc);
dbdesc = 0;
if (flno >= 0) fstack[flno].fd = 0;
} else {
((char *) line->data)[60] = '\000';
warnx("line too long: ignoring %.100s...",
(char *)line->data);
}
}
}
/*
* get a key/line pair from fd
*/
int
seq(fd, line, key)
FILE *fd;
DBT *key, *line;
{
static char *buf, flag = 1;
register char *end, *pos;
register int c;
if (flag) {
flag = 0;
buf = (char *) linebuf;
end = buf + MAXLLEN;
line->data = buf;
}
pos = buf;
while ((c = getc(fd)) != EOF) {
if ((*pos++ = c) == REC_D) {
line->size = pos - buf;
return (0);
}
if (pos == end) {
line->size = MAXLLEN;
*--pos = REC_D;
while ((c = getc(fd)) != EOF) {
if (c == REC_D)
return (BUFFEND);
}
}
}
if (pos != buf) {
warnx("last character not record delimiter");
*pos++ = REC_D;
line->size = pos - buf;
return (0);
} else
return (EOF);
}
/*
* write a key/line pair to a temporary file
*/
void
putrec(rec, fd)
register struct recheader *rec;
register FILE *fd;
{
EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
}
/*
* write a line to output
*/
void
putline(rec, fd)
register struct recheader *rec;
register FILE *fd;
{
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
}
/*
* get a record from a temporary file. (Used by merge sort.)
*/
int
geteasy(flno, filelist, nfiles, rec, end, dummy2)
int flno, nfiles;
union f_handle filelist;
register struct recheader *rec;
register u_char *end;
struct field *dummy2;
{
int i;
FILE *fd;
fd = fstack[flno].fd;
if ((u_char *) rec > end - sizeof(TRECHEADER))
return (BUFFEND);
if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
fclose(fd);
fstack[flno].fd = 0;
return (EOF);
}
if (end - rec->data < rec->length) {
for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
ungetc(*((char *) rec + i), fd);
return (BUFFEND);
}
fread(rec->data, rec->length, 1, fd);
return (0);
}

286
usr.bin/sort/fsort.c Normal file
View File

@ -0,0 +1,286 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/*
* Read in the next bin. If it fits in one segment sort it;
* otherwise refine it by segment deeper by one character,
* and try again on smaller bins. Sort the final bin at this level
* of recursion to keep the head of fstack at 0.
* After PANIC passes, abort to merge sort.
*/
#include "sort.h"
#include "fsort.h"
#include <stdlib.h>
#include <string.h>
u_char **keylist = 0, *buffer = 0, *linebuf = 0;
struct tempfile fstack[MAXFCT];
extern char *toutpath;
#define FSORTMAX 4
int PANIC = FSORTMAX;
void
fsort(binno, depth, infiles, nfiles, outfd, ftbl)
register int binno, depth, nfiles;
register union f_handle infiles;
FILE *outfd;
register struct field *ftbl;
{
register u_char *bufend, **keypos, *tmpbuf;
u_char *weights;
int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
register int c, nelem;
long sizes [NBINS+1];
union f_handle tfiles, mstart = {MAXFCT-16};
register int (*get)(int, union f_handle, int, RECHEADER *,
u_char *, struct field *);
register struct recheader *crec;
struct field tfield[2];
FILE *prevfd, *tailfd[FSORTMAX+1];
memset(tailfd, 0, sizeof(tailfd));
prevfd = outfd;
memset(tfield, 0, sizeof(tfield));
if (ftbl[0].flags & R)
tfield[0].weights = Rascii;
else
tfield[0].weights = ascii;
tfield[0].icol.num = 1;
weights = ftbl[0].weights;
if (!buffer) {
buffer = malloc(BUFSIZE);
keylist = malloc(MAXNUM * sizeof(u_char *));
if (!SINGL_FLD)
linebuf = malloc(MAXLLEN);
}
bufend = buffer + BUFSIZE;
if (binno >= 0) {
tfiles.top = infiles.top + nfiles;
get = getnext;
} else {
tfiles.top = 0;
if (SINGL_FLD)
get = makeline;
else
get = makekey;
}
for (;;) {
memset(sizes, 0, sizeof(sizes));
c = ntfiles = 0;
if (binno == weights[REC_D] &&
!(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
rd_append(weights[REC_D],
infiles, nfiles, prevfd, buffer, bufend);
break;
} else if (binno == weights[REC_D]) {
depth = 0; /* start over on flat weights */
ftbl = tfield;
weights = ftbl[0].weights;
}
while (c != EOF) {
keypos = keylist;
nelem = 0;
crec = (RECHEADER *) buffer;
while((c = get(binno, infiles, nfiles, crec, bufend,
ftbl)) == 0) {
*keypos++ = crec->data + depth;
if (++nelem == MAXNUM) {
c = BUFFEND;
break;
}
crec =(RECHEADER *) ((char *) crec +
SALIGN(crec->length) + sizeof(TRECHEADER));
}
if (c == BUFFEND || ntfiles || mfct) { /* push */
if (panic >= PANIC) {
fstack[MAXFCT-16+mfct].fd = ftmp();
if (radixsort(keylist, nelem, weights,
REC_D))
err(2, NULL);
append(keylist, nelem, depth, fstack[
MAXFCT-16+mfct].fd, putrec, ftbl);
mfct++;
/* reduce number of open files */
if (mfct == 16 ||(c == EOF && ntfiles)) {
tmpbuf = malloc(bufend -
crec->data);
memmove(tmpbuf, crec->data,
bufend - crec->data);
fstack[tfiles.top + ntfiles].fd
= ftmp();
fmerge(0, mstart, mfct, geteasy,
fstack[tfiles.top+ntfiles].fd,
putrec, ftbl);
++ntfiles;
mfct = 0;
memmove(crec->data, tmpbuf,
bufend - crec->data);
free(tmpbuf);
}
} else {
fstack[tfiles.top + ntfiles].fd= ftmp();
onepass(keylist, depth, nelem, sizes,
weights, fstack[tfiles.top+ntfiles].fd);
++ntfiles;
}
}
}
get = getnext;
if (!ntfiles && !mfct) { /* everything in memory--pop */
if (nelem > 1)
if (radixsort(keylist, nelem, weights, REC_D))
err(2, NULL);
append(keylist, nelem, depth, outfd, putline, ftbl);
break; /* pop */
}
if (panic >= PANIC) {
if (!ntfiles)
fmerge(0, mstart, mfct, geteasy,
outfd, putline, ftbl);
else
fmerge(0, tfiles, ntfiles, geteasy,
outfd, putline, ftbl);
break;
}
total = maxb = lastb = 0; /* find if one bin dominates */
for (i = 0; i < NBINS; i++)
if (sizes[i]) {
if (sizes[i] > sizes[maxb])
maxb = i;
lastb = i;
total += sizes[i];
}
if (sizes[maxb] < max((total / 2) , BUFSIZE))
maxb = lastb; /* otherwise pop after last bin */
fstack[tfiles.top].lastb = lastb;
fstack[tfiles.top].maxb = maxb;
/* start refining next level. */
get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
for (i = 0; i < maxb; i++) {
if (!sizes[i]) /* bin empty; step ahead file offset */
get(i, tfiles, ntfiles, crec, bufend, 0);
else
fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
}
if (lastb != maxb) {
if (prevfd != outfd)
tailfd[panic] = prevfd;
prevfd = ftmp();
for (i = maxb+1; i <= lastb; i++)
if (!sizes[i])
get(i, tfiles, ntfiles, crec, bufend,0);
else
fsort(i, depth+1, tfiles, ntfiles,
prevfd, ftbl);
}
/* sort biggest (or last) bin at this level */
depth++;
panic++;
binno = maxb;
infiles.top = tfiles.top; /* getnext will free tfiles, */
nfiles = ntfiles; /* so overwrite them */
}
if (prevfd != outfd) {
concat(outfd, prevfd);
fclose(prevfd);
}
for (i = panic; i >= 0; --i)
if (tailfd[i]) {
concat(outfd, tailfd[i]);
fclose(tailfd[i]);
}
}
/*
This is one pass of radix exchange, dumping the bins to disk.
*/
#define swap(a, b, t) t = a, a = b, b = t
void
onepass(a, depth, n, sizes, tr, fd)
u_char **a;
int depth;
long n, sizes[];
u_char *tr;
FILE *fd;
{
long tsizes[NBINS+1];
u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
static histo[256];
int *hp;
register int c;
u_char **an, *t, **aj;
register u_char **ak, *r;
memset(tsizes, 0, sizeof(tsizes));
depth += sizeof(TRECHEADER);
an = a + n;
for (ak = a; ak < an; ak++) {
histo[c = tr[**ak]]++;
tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
}
bin[0] = a;
bpmax = bin + 256;
tp = top, hp = histo;
for (bp = bin; bp < bpmax; bp++) {
*tp++ = *(bp+1) = *bp + (c = *hp);
*hp++ = 0;
if (c <= 1)
continue;
}
for(aj = a; aj < an; *aj = r, aj = bin[c+1])
for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
swap(*ak, r, t);
for (ak = a, c = 0; c < 256; c++) {
an = bin[c+1];
n = an - ak;
tsizes[c] += n * sizeof(TRECHEADER);
/* tell getnext how many elements in this bin, this segment. */
EWRITE(tsizes+c, sizeof(long), 1, fd);
sizes[c] += tsizes[c];
for (; ak < an; ++ak)
putrec((RECHEADER *) *ak, fd);
}
}

60
usr.bin/sort/fsort.h Normal file
View File

@ -0,0 +1,60 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
*/
#define POW 20 /* exponent for buffer size */
#define BUFSIZE (1 << POW)
#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */
#define BUFFEND (EOF-2)
#define MAXFCT 1000
#define MAXLLEN ((1 << min(POW-4, 16)) - 14)
extern u_char **keylist, **l2buf, *buffer, *linebuf;
/* temp files in the stack have a file descriptor, a largest bin (maxb)
* which becomes the last non-empty bin (lastb) when the actual largest
* bin is smaller than max(half the total file, BUFSIZE)
* Max_o is the offset of maxb so it can be sought after the other bins
* are sorted.
*/
struct tempfile {
FILE *fd;
u_char maxb;
u_char lastb;
long max_o;
};
extern struct tempfile fstack[MAXFCT];

326
usr.bin/sort/init.c Normal file
View File

@ -0,0 +1,326 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#include "sort.h"
#include <ctype.h>
#include <string.h>
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
u_char gweights[NBINS];
/*
* clist (list of columns which correspond to one or more icol or tcol)
* is in increasing order of columns.
* Fields are kept in increasing order of fields.
*/
/*
* keep clist in order--inserts a column in a sorted array
*/
static void
insertcol(field)
struct field *field;
{
int i;
for (i = 0; i < ncols; i++)
if (field->icol.num <= clist[i].num)
break;
if (field->icol.num != clist[i].num) {
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
clist[i].num = field->icol.num;
ncols++;
}
if (field->tcol.num && field->tcol.num != field->icol.num) {
for (i = 0; i < ncols; i++)
if (field->tcol.num <= clist[i].num)
break;
if (field->tcol.num != clist[i].num) {
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
clist[i].num = field->tcol.num;
ncols++;
}
}
}
/*
* matches fields with the appropriate columns--n^2 but who cares?
*/
void
fldreset(fldtab)
struct field *fldtab;
{
int i;
fldtab[0].tcol.p = clist+ncols-1;
for (++fldtab; fldtab->icol.num; ++fldtab) {
for (i = 0; fldtab->icol.num != clist[i].num; i++);
fldtab->icol.p = clist + i;
if (!fldtab->tcol.num)
continue;
for (i = 0; fldtab->tcol.num != clist[i].num; i++);
fldtab->tcol.p = clist + i;
}
}
/*
* interprets a column in a -k field
*/
char *
setcolumn(pos, cur_fld, gflag)
char *pos;
struct field *cur_fld;
int gflag;
{
struct column *col;
int tmp;
col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
pos += sscanf(pos, "%d", &(col->num));
while (isdigit(*pos))
pos++;
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
errx(2, "field numbers must be positive");
if (*pos == '.') {
if (!col->num)
errx(2, "cannot indent end of line");
pos += sscanf(++pos, "%d", &(col->indent));
while (isdigit(*pos))
pos++;
if (&cur_fld->icol == col)
col->indent--;
if (col->indent < 0)
errx(2, "illegal offset");
}
if (optval(*pos, cur_fld->tcol.num))
while (tmp = optval(*pos, cur_fld->tcol.num)) {
cur_fld->flags |= tmp;
pos++;
}
if (cur_fld->icol.num == 0)
cur_fld->icol.num = 1;
return (pos);
}
int
setfield(pos, cur_fld, gflag)
char *pos;
struct field *cur_fld;
int gflag;
{
static int nfields = 0;
int tmp;
char *setcolumn();
if (++nfields == ND)
errx(2, "too many sort keys. (Limit is %d)", ND-1);
cur_fld->weights = ascii;
cur_fld->mask = alltable;
pos = setcolumn(pos, cur_fld, gflag);
if (*pos == '\0') /* key extends to EOL. */
cur_fld->tcol.num = 0;
else {
if (*pos != ',')
errx(2, "illegal field descriptor");
setcolumn((++pos), cur_fld, gflag);
}
if (!cur_fld->flags)
cur_fld->flags = gflag;
tmp = cur_fld->flags;
/*
* Assign appropriate mask table and weight table.
* If the global weights are reversed, the local field
* must be "re-reversed".
*/
if (((tmp & R) ^ (gflag & R)) && tmp & F)
cur_fld->weights = RFtable;
else if (tmp & F)
cur_fld->weights = Ftable;
else if ((tmp & R) ^ (gflag & R))
cur_fld->weights = Rascii;
if (tmp & I)
cur_fld->mask = itable;
else if (tmp & D)
cur_fld->mask = dtable;
cur_fld->flags |= (gflag & (BI | BT));
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
cur_fld->flags &= (D|F|I|N|R|BI);
if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
&& cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
&& cur_fld->tcol.indent < cur_fld->icol.indent))
errx(2, "fields out of order");
insertcol(cur_fld);
return (cur_fld->tcol.num);
}
int
optval(desc, tcolflag)
int desc, tcolflag;
{
switch(desc) {
case 'b':
if (!tcolflag)
return(BI);
else
return(BT);
case 'd': return(D);
case 'f': return(F);
case 'i': return(I);
case 'n': return(N);
case 'r': return(R);
default: return(0);
}
}
void
fixit(argc, argv)
int *argc;
char **argv;
{
int i, j, v, w, x;
static char vbuf[ND*20], *vpos, *tpos;
vpos = vbuf;
for (i = 1; i < *argc; i++) {
if (argv[i][0] == '+') {
tpos = argv[i]+1;
argv[i] = vpos;
vpos += sprintf(vpos, "-k");
tpos += sscanf(tpos, "%d", &v);
while (isdigit(*tpos))
tpos++;
vpos += sprintf(vpos, "%d", v+1);
if (*tpos == '.') {
tpos += sscanf(++tpos, "%d", &x);
vpos += sprintf(vpos, ".%d", x+1);
}
while (*tpos)
*vpos++ = *tpos++;
vpos += sprintf(vpos, ",");
if (argv[i+1] &&
argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
tpos = argv[i+1] + 1;
tpos += sscanf(tpos, "%d", &w);
while (isdigit(*tpos))
tpos++;
x = 0;
if (*tpos == '.') {
tpos += sscanf(++tpos, "%d", &x);
while (isdigit(*tpos))
*tpos++;
}
if (x) {
vpos += sprintf(vpos, "%d", w+1);
vpos += sprintf(vpos, ".%d", x);
} else
vpos += sprintf(vpos, "%d", w);
while (*tpos)
*vpos++ = *tpos++;
for (j= i+1; j < *argc; j++)
argv[j] = argv[j+1];
*argc -= 1;
}
}
}
}
/*
* ascii, Rascii, Ftable, and RFtable map
* REC_D -> REC_D; {not REC_D} -> {not REC_D}.
* gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
* Note: when sorting in forward order, to encode character zero in a key,
* use \001\001; character 1 becomes \001\002. In this case, character 0
* is reserved for the field delimiter. Analagously for -r (fld_d = 255).
* Note: this is only good for ASCII sorting. For different LC 's,
* all bets are off. See also num_init in number.c
*/
void
settables(gflags)
int gflags;
{
u_char *wts;
int i, incr;
for (i=0; i < 256; i++) {
ascii[i] = i;
if (i > REC_D && i < 255 - REC_D+1)
Rascii[i] = 255 - i + 1;
else
Rascii[i] = 255 - i;
if (islower(i)) {
Ftable[i] = Ftable[i- ('a' -'A')];
RFtable[i] = RFtable[i - ('a' - 'A')];
} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
Ftable[i] = i + 1;
RFtable[i] = Rascii[i] - 1;
} else {
Ftable[i] = i;
RFtable[i] = Rascii[i];
}
alltable[i] = 1;
if (i == '\n' || isprint(i))
itable[i] = 1;
else itable[i] = 0;
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
dtable[i] = 1;
else dtable[i] = 0;
}
Rascii[REC_D] = RFtable[REC_D] = REC_D;
if (REC_D >= 'A' && REC_D < 'Z')
++Ftable[REC_D + ('a' - 'A')];
if (gflags & R && (!(gflags & F) || !SINGL_FLD))
wts = Rascii;
else if (!(gflags & F) || !SINGL_FLD)
wts = ascii;
else if (gflags & R)
wts = RFtable;
else
wts = Ftable;
memmove(gweights, wts, sizeof(gweights));
incr = (gflags & R) ? -1 : 1;
for (i = 0; i < REC_D; i++)
gweights[i] += incr;
gweights[REC_D] = ((gflags & R) ? 255 : 0);
if (SINGL_FLD && gflags & F) {
for (i = 0; i < REC_D; i++) {
ascii[i] += incr;
Rascii[i] += incr;
}
ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
}
}

304
usr.bin/sort/msort.c Normal file
View File

@ -0,0 +1,304 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#include "sort.h"
#include "fsort.h"
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* Subroutines using comparisons: merge sort and check order */
#define DELETE (1)
#define LALIGN(n) ((n+3) & ~3)
typedef struct mfile {
u_char *end;
short flno;
struct recheader rec[1];
} MFILE;
typedef struct tmfile {
u_char *end;
short flno;
struct trecheader rec[1];
} TMFILE;
u_char *wts, *wts1 = 0;
struct mfile *cfilebuf;
static int cmp __P((struct recheader *, struct recheader *));
static int insert __P((struct mfile **, struct mfile **, int, int));
void
fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
union f_handle files;
int binno, nfiles;
int (*get)();
FILE *outfd;
void (*fput)();
struct field *ftbl;
{
FILE *tout;
int i, j, last;
void (*put)(struct recheader *, FILE *);
extern int geteasy();
struct tempfile *l_fstack;
wts = ftbl->weights;
if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
wts1 = (ftbl->flags & R) ? Rascii : ascii;
if (!cfilebuf)
cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
if (!buffer || i > BUFSIZE) {
buffer = buffer ? realloc(buffer, i) : malloc(i);
if (!buffer)
err(2, NULL);
if (!SINGL_FLD)
linebuf = malloc(MAXLLEN);
}
if (binno >= 0)
l_fstack = fstack + files.top;
else
l_fstack = fstack;
while (nfiles) {
put = putrec;
for (j = 0; j < nfiles; j += 16) {
if (nfiles <= 16) {
tout = outfd;
put = fput;
}
else
tout = ftmp();
last = min(16, nfiles - j);
if (binno < 0) {
for (i = 0; i < last; i++)
if (!(l_fstack[i+MAXFCT-1-16].fd =
fopen(files.names[j + i], "r")))
err(2, "%s", files.names[j+i]);
merge(MAXFCT-1-16, last, get, tout, put, ftbl);
}
else {
for (i = 0; i< last; i++)
rewind(l_fstack[i+j].fd);
merge(files.top+j, last, get, tout, put, ftbl);
}
if (nfiles > 16) l_fstack[j/16].fd = tout;
}
nfiles = (nfiles + 15) / 16;
if (nfiles == 1)
nfiles = 0;
if (binno < 0) {
binno = 0;
get = geteasy;
files.top = 0;
}
}
}
void
merge(infl0, nfiles, get, outfd, put, ftbl)
int infl0, nfiles;
int (*get)();
void (*put)(struct recheader *, FILE *);
FILE *outfd;
struct field *ftbl;
{
int c, i, j;
union f_handle dummy = {0};
struct mfile *flist[16], *cfile;
for (i = j = 0; i < nfiles; i++) {
cfile = (MFILE *) (buffer +
i * LALIGN(MAXLLEN + sizeof(TMFILE)));
cfile->flno = j + infl0;
cfile->end = cfile->rec->data + MAXLLEN;
for (c = 1; c == 1;) {
if (EOF == (c = get(j+infl0, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
--i;
--nfiles;
break;
}
if (i)
c = insert(flist, &cfile, i, !DELETE);
else
flist[0] = cfile;
}
j++;
}
cfile = cfilebuf;
cfile->flno = flist[0]->flno;
cfile->end = cfile->rec->data + MAXLLEN;
while (nfiles) {
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, dummy, nfiles,
cfile->rec, cfile->end, ftbl))) {
put(flist[0]->rec, outfd);
memmove(flist, flist + 1,
sizeof(MFILE *) * (--nfiles));
cfile->flno = flist[0]->flno;
break;
}
if (!(c = insert(flist, &cfile, nfiles, DELETE)))
put(cfile->rec, outfd);
}
}
}
/*
* if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
* otherwise just inserts *rec in flist.
*/
static int
insert(flist, rec, ttop, delete)
struct mfile **flist, **rec;
int delete, ttop; /* delete = 0 or 1 */
{
register struct mfile *tmprec;
register int top, mid, bot = 0, cmpv = 1;
tmprec = *rec;
top = ttop;
for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
cmpv = cmp(tmprec->rec, flist[mid]->rec);
if (cmpv < 0)
top = mid;
else if (cmpv > 0)
bot = mid;
else {
if (!UNIQUE)
bot = mid - 1;
break;
}
}
if (delete) {
if (UNIQUE) {
if (!bot && cmpv)
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (!cmpv)
return(1);
}
tmprec = flist[0];
if (bot)
memmove(flist, flist+1, bot * sizeof(MFILE **));
flist[bot] = *rec;
*rec = tmprec;
(*rec)->flno = (*flist)->flno;
return (0);
}
else {
if (!bot && !(UNIQUE && !cmpv)) {
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (cmpv < 0)
bot = -1;
}
if (UNIQUE && !cmpv)
return (1);
bot++;
memmove(flist + bot+1, flist + bot,
(ttop - bot) * sizeof(MFILE **));
flist[bot] = *rec;
return (0);
}
}
/*
* check order on one file
*/
void
order(infile, get, ftbl)
union f_handle infile;
int (*get)();
struct field *ftbl;
{
u_char *end;
int c;
struct recheader *crec, *prec, *trec;
if (!SINGL_FLD)
linebuf = malloc(MAXLLEN);
buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
crec = (RECHEADER *) buffer;
prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
wts = ftbl->weights;
if (SINGL_FLD && ftbl->flags & F)
wts1 = ftbl->flags & R ? Rascii : ascii;
else
wts1 = 0;
if (0 == get(-1, infile, 1, prec, end, ftbl))
while (0 == get(-1, infile, 1, crec, end, ftbl)) {
if (0 < (c = cmp(prec, crec))) {
crec->data[crec->length-1] = 0;
errx(1, "found disorder: %s", crec->data+crec->offset);
}
if (UNIQUE && !c) {
crec->data[crec->length-1] = 0;
errx(1, "found non-uniqueness: %s",
crec->data+crec->offset);
}
trec = prec;
prec = crec;
crec = trec;
}
exit(0);
}
static int
cmp(rec1, rec2)
struct recheader *rec1, *rec2;
{
register r;
register u_char *pos1, *pos2, *end;
register u_char *cwts;
for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
pos1 = rec1->data;
pos2 = rec2->data;
if (!SINGL_FLD && UNIQUE)
end = pos1 + min(rec1->offset, rec2->offset);
else
end = pos1 + min(rec1->length, rec2->length);
for (; pos1 < end; ) {
if (r = cwts[*pos1++] - cwts[*pos2++])
return (r);
}
}
return (0);
}

40
usr.bin/sort/pathnames.h Normal file
View File

@ -0,0 +1,40 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
*/
#define _PATH_STDIN "/dev/stdin"
#define _PATH_SORTTMP "/var/tmp/sort.XXXXXXXX"

364
usr.bin/sort/sort.1 Normal file
View File

@ -0,0 +1,364 @@
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
.Dd June 6, 1993
.Dt SORT 1
.Os
.Sh NAME
.Nm sort
.Nd sort or merge text files
.Sh SYNOPSIS
.Nm sort
.Op Fl cmubdfinr
.Op Fl t Ar char
.Op Fl T Ar char
.Oo
.Cm Fl k Ar field1[,field2]
.Oc
.Ar ...
.Op Fl o Ar output
.Op Ar file
.Ar ...
.Sh DESCRIPTION
The
.Nm sort
utility
sorts text files by lines.
Comparisons are based on one or more sort keys extracted
from each line of input, and are performed
lexicographically. By default, if keys are not given,
.Nm sort
regards each input line as a single field.
.Pp
The following options are available:
.Bl -tag -width indent
.It Fl c
Check that the single input file is sorted.
If the file is not sorted,
.Nm sort
produces the appropriate error messages and exits with code 1;
otherwise,
.Nm sort
returns 0.
.Nm Sort
.Fl c
produces no output.
.It Fl m
Merge only; the input files are assumed to be pre-sorted.
.It Fl o Ar output
The argument given is the name of an
.Ar output
file to
be used instead of the standard output.
This file
can be the same as one of the input files.
.It Fl u
Unique: suppress all but one in each set of lines
having equal keys.
If used with the
.Fl c
option,
check that there are no lines with duplicate keys.
.El
.Pp
The following options override the default ordering rules.
When ordering options appear independent of key field
specifications, the requested field ordering rules are
applied globally to all sort keys.
When attached to a specific key (see
.Fl k ) ,
the ordering options override
all global ordering options for that key.
.Bl -tag -width indent
.It Fl d
Only blank space and alphanumeric characters
.\" according
.\" to the current setting of LC_CTYPE
are used
in making comparisons.
.It Fl f
Considers all lowercase characters that have uppercase
equivalents to be the same for purposes of
comparison.
.It Fl i
Ignore all non-printable characters.
.It Fl n
An initial numeric string, consisting of optional
blank space, optional minus sign, and zero or more
digits (including decimal point)
.\" with
.\" optional radix character and thousands
.\" separator
.\" (as defined in the current locale),
is sorted by arithmetic value.
(The
.Fl n
option no longer implies
the
.Fl b
option.)
.It Fl r
Reverse the sense of comparisons.
.El
.Pp
The treatment of field separators can be altered using the
options:
.Bl -tag -width indent
.It Fl b
Ignores leading blank space when determining the start
and end of a restricted sort key.
A
.Fl b
option specified before the first
.Fl k
option applies globally to all
.Fl k
options.
Otherwise, the
.Fl b
option can be
attached independently to each
.Ar field
argument of the
.Fl k
option (see below).
Note that the
.Fl b
option
has no effect unless key fields are specified.
.It Fl t Ar char
.Ar Char
is used as the field separator character. The initial
.Ar char
is not considered to be part of a field when determining
key offsets (see below).
Each occurrence of
.Ar char
is significant (for example,
.Dq Ar charchar
delimits an empty field).
If
.Fl t
is not specified,
blank space characters are used as default field
separators.
.It Fl T Ar char
.Ar Char
is used as the record separator character.
This should be used with discretion;
.Fl T Ar <alphanumeric>
usually produces undesirable results.
The default line separator is newline.
.It Fl k Ar field1[,field2]
Designates the starting position,
.Ar field1 ,
and optional ending position,
.Ar field2 ,
of a key field.
The
.Fl k
option replaces the obsolescent options
.Cm \(pl Ns Ar pos1
and
.Fl Ns Ar pos2 .
.El
.Pp
The following operands are available:
.Bl -tag -width indent
.Ar file
The pathname of a file to be sorted, merged, or checked.
If no file
operands are specified, or if
a file operand is
.Fl ,
the standard input is used.
.Pp
A field is
defined as a minimal sequence of characters followed by a
field separator or a newline character.
By default, the first
blank space of a sequence of blank spaces acts as the field separator.
All blank spaces in a sequence of blank spaces are considered
as part of the next field; for example, all blank spaces at
the beginning of a line are considered to be part of the
first field.
.Pp
Fields are specified
by the
.Fl k Ar field1[,field2]
argument. A missing
.Ar field2
argument defaults to the end of a line.
.Pp
The arguments
.Ar field1
and
.Ar field2
have the form
.Em m.n
followed by one or more of the options
.Fl b , d , f , i ,
.Fl n , r .
A
.Ar field1
position specified by
.Em m.n
.Em (m,n > 0)
is interpreted as the
.Em n Ns th
character in the
.Em m Ns th
field.
A missing
.Em \&.n
in
.Ar field1
means
.Ql \&.1 ,
indicating the first character of the
.Em m Ns th
field;
If the
.Fl b
option is in effect,
.Em n
is counted from the first
non-blank character in the
.Em m Ns th
field;
.Em m Ns \&.1b
refers to the first
non-blank character in the
.Em m Ns th
field.
.Pp
A
.Ar field2
position specified by
.Em m.n
is interpreted as
the
.Em n Ns th
character (including separators) of the
.Em m Ns th
field.
A missing
.Em \&.n
indicates the last character of the
.Em m Ns th
field;
.Em m
= \&0
designates the end of a line.
Thus the option
.Fl k Ar v.x,w.y
is synonymous with the obsolescent option
.Cm \(pl Ns Ar v-\&1.x-\&1
.Fl Ns Ar w-\&1.y ;
when
.Em y
is omitted,
.Fl k Ar v.x,w
is synonymous with
.Cm \(pl Ns Ar v-\&1.x-\&1
.Fl Ns Ar w+1.0 .
The obsolescent
.Cm \(pl Ns Ar pos1
.Fl Ns Ar pos2
option is still supported, except for
.Fl Ns Ar w\&.0b,
which has no
.Fl k
equivalent.
.Sh FILES
.Bl -tag -width Pa -compact
.It Pa /var/tmp/sort.*
Default temporary directories.
.It Pa Ar output Ns #PID
Temporary name for
.Ar output
if
.Ar output
already exists.
.El
.Sh SEE ALSO
.Xr comm 1 ,
.Xr uniq 1 ,
.Xr join 1
.Sh RETURN VALUES
Sort exits with one of the following values:
.Bl -tag -width flag -compact
.It Pa 0:
normal behavior.
.It Pa 1:
on disorder (or non-uniqueness) with the
.Fl c
option
.It Pa 2:
an error occurred.
.Sh BUGS
Lines longer than 65522 characters are discarded and processing continues.
To sort files larger than 60Mb, use
.Nm sort
.Fl H ;
files larger than 704Mb must be sorted in smaller pieces, then merged.
To protect data
.Nm sort
.Fl o
calls link and unlink, and thus fails in protected directories.
.Sh HISTORY
A
.Nm sort
command appeared in
.At v6 .
.Sh NOTES
The current sort command uses lexicographic radix sorting, which requires
that sort keys be kept in memory (as opposed to previous versions which used quick
and merge sorts and did not.)
Thus performance depends highly on efficient choice of sort keys, and the
.Fl b
option and the
.Ar field2
argument of the
.Fl k
option should be used whenever possible.
Similarly,
.Nm sort
.Fl k1f
is equivalent to
.Nm sort
.Fl f
and may take twice as long.

279
usr.bin/sort/sort.c Normal file
View File

@ -0,0 +1,279 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char copyright[] =
"@(#) Copyright (c) 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/* Sort sorts a file using an optional user-defined key.
* Sort uses radix sort for internal sorting, and allows
* a choice of merge sort and radix sort for external sorting.
*/
#include "sort.h"
#include "fsort.h"
#include "pathnames.h"
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
/*
* weight tables. Gweights is one of ascii, Rascii..
* modified to weight rec_d = 0 (or 255)
*/
extern u_char gweights[NBINS];
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
/*
* masks of ignored characters. Alltable is 256 ones
*/
u_char dtable[NBINS], itable[NBINS], alltable[NBINS];
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
struct coldesc clist[(ND+1)*2];
int ncols = 0;
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
char devstdin[] = _PATH_STDIN;
char toutpath[_POSIX_PATH_MAX];
static void cleanup __P((void));
static void onsig __P((int));
static void usage __P((char *));
int
main(argc, argv)
int argc;
char *argv[];
{
extern int optind;
extern char *optarg;
int (*get)();
int ch, i, stdinflag = 0, tmp = 0;
char cflag = 0, mflag = 0, nflag = 0;
char *outfile, *outpath = 0;
struct field fldtab[ND+2], *ftpos;
union f_handle filelist;
FILE *outfd;
memset(fldtab, 0, (ND+2)*sizeof(struct field));
memset(d_mask, 0, NBINS);
d_mask[REC_D = '\n'] = REC_D_F;
SINGL_FLD = SEP_FLAG = 0;
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
ftpos = fldtab;
fixit(&argc, argv);
while ((ch = getopt(argc, argv, "bcdfik:mHno:rt:T:ux")) != EOF) {
switch (ch) {
case 'b': fldtab->flags |= BI | BT;
break;
case 'd':
case 'i':
case 'f':
case 'r': tmp |= optval(ch, 0);
if (tmp & R && tmp & F)
fldtab->weights = RFtable;
else if (tmp & F)
fldtab->weights = Ftable;
else if(tmp & R)
fldtab->weights = Rascii;
fldtab->flags |= tmp;
break;
case 'o':
outpath = optarg;
break;
case 'n':
nflag = 1;
setfield("1n", ++ftpos, fldtab->flags&(~R));
break;
case 'k':
setfield(optarg, ++ftpos, fldtab->flags);
break;
case 't':
if (SEP_FLAG)
usage("multiple field delimiters");
SEP_FLAG = 1;
d_mask[' '] &= ~FLD_D;
d_mask['\t'] &= ~FLD_D;
d_mask[*optarg] |= FLD_D;
if (d_mask[*optarg] & REC_D_F)
err(2, "record/field delimiter clash");
break;
case 'T':
if (REC_D != '\n')
usage("multiple record delimiters");
if ('\n' == (REC_D = *optarg))
break;
d_mask['\n'] = d_mask[' '];
d_mask[REC_D] = REC_D_F;
break;
case 'u':
UNIQUE = 1;
break;
case 'c':
cflag = 1;
break;
case 'm':
mflag = 1;
break;
case 'H':
PANIC = 0;
break;
case '?':
default: usage("");
}
}
if (cflag && argc > optind+1)
errx(2, "too many input files for -c option");
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
outpath = argv[argc-1];
argc -= 2;
}
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
errx(2, "too many input files for -m option");
for (i = optind; i < argc; i++) {
/* allow one occurrence of /dev/stdin */
if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) {
if (stdinflag)
warnx("ignoring extra \"%s\" in file list",
argv[i]);
else {
stdinflag = 1;
argv[i] = devstdin;
}
} else if (ch = access(argv[i], R_OK))
err(2, "%s", argv[i]);
}
if (!(fldtab->flags & (I|D) || fldtab[1].icol.num)) {
SINGL_FLD = 1;
fldtab[0].icol.num = 1;
} else {
if (!fldtab[1].icol.num) {
fldtab[0].flags &= ~(BI|BT);
setfield("1", ++ftpos, fldtab->flags);
}
if (nflag)
fldtab[1].flags |= fldtab->flags;
fldreset(fldtab);
fldtab[0].flags &= ~F;
}
settables(fldtab[0].flags);
num_init();
fldtab->weights = gweights;
if (optind == argc)
argv[--optind] = devstdin;
filelist.names = argv+optind;
if (SINGL_FLD)
get = makeline;
else
get = makekey;
if (cflag) {
order(filelist, get, fldtab);
/* NOT REACHED */
}
if (!outpath) {
(void)snprintf(toutpath,
sizeof(toutpath), "%sstdout", _PATH_DEV);
outfile = outpath = toutpath;
} else if (!(ch = access(outpath, 0)) &&
strncmp(_PATH_DEV, outpath, 5)) {
struct sigaction act = {0, SIG_BLOCK, 6};
int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ,
SIGVTALRM, SIGPROF, 0};
errno = 0;
if (access(outpath, W_OK))
err(2, "%s", outpath);
act.sa_handler = cleanup;
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXX", outpath);
outfile = mktemp(toutpath);
if (!outfile)
err(2, "%s", toutpath);
(void)atexit(cleanup);
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
sigaction(sigtable[i], &act, 0);
} else outfile = outpath;
if (!(outfd = fopen(outfile, "w")))
err(2, "%s", outfile);
if (mflag)
fmerge(-1, filelist, argc-optind, get, outfd, putline, fldtab);
else
fsort(-1, 0, filelist, argc-optind, outfd, fldtab);
if (outfile != outpath) {
if (access(outfile, 0))
err(2, "%s", outfile);
(void)unlink(outpath);
if (link(outfile, outpath))
err(2, "cannot link %s: output left in %s",
outpath, outfile);
(void)unlink(outfile);
}
exit(0);
}
static void
onsig(s)
int s;
{
cleanup();
exit(2); /* return 2 on error/interrupt */
}
static void
cleanup()
{
if (toutpath[0])
(void)unlink(toutpath);
}
static void
usage(msg)
char *msg;
{
if (msg)
(void)fprintf(stderr, "sort: %s\n", msg);
(void)fprintf(stderr, "usage: [-o output] [-cmubdfinr] [-t char] ");
(void)fprintf(stderr, "[-T char] [-k keydef] ... [files]\n");
exit(2);
}

142
usr.bin/sort/sort.h Normal file
View File

@ -0,0 +1,142 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)sort.h 8.1 (Berkeley) 6/6/93
*/
#include <sys/param.h>
#include <db.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#define NBINS 256
#define MAXMERGE 16
/* values for masks, weights, and other flags. */
#define I 1 /* mask out non-printable characters */
#define D 2 /* sort alphanumeric characters only */
#define N 4 /* Field is a number */
#define F 8 /* weight lower and upper case the same */
#define R 16 /* Field is reversed with respect to the global weight */
#define BI 32 /* ignore blanks in icol */
#define BT 64 /* ignore blanks in tcol */
/* masks for delimiters: blanks, fields, and termination. */
#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
#define REC_D_F 4 /* '\n' default; from -T otherwise */
#define ND 10 /* limit on number of -k options. */
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define FCLOSE(file) { \
if (EOF == fclose(file)) \
err(2, "%s", file); \
}
#define EWRITE(ptr, size, n, f) { \
if (!fwrite(ptr, size, n, f)) \
err(2, NULL); \
}
/* length of record is currently limited to 2^16 - 1 */
typedef u_short length_t;
#define SALIGN(n) ((n+1) & ~1)
/* a record is a key/line pair starting at rec.data. It has a total length
* and an offset to the start of the line half of the pair.
*/
typedef struct recheader {
length_t length;
length_t offset;
u_char data[1];
} RECHEADER;
typedef struct trecheader {
length_t length;
length_t offset;
} TRECHEADER;
/* This is the column as seen by struct field. It is used by enterfield.
* They are matched with corresponding coldescs during initialization.
*/
struct column {
struct coldesc *p;
int num;
int indent;
};
/* a coldesc has a number and pointers to the beginning and end of the
* corresponding column in the current line. This is determined in enterkey.
*/
typedef struct coldesc {
u_char *start;
u_char *end;
int num;
} COLDESC;
/* A field has an initial and final column; an omitted final column
* implies the end of the line. Flags regulate omission of blanks and
* numerical sorts; mask determines which characters are ignored (from -i, -d);
* weights determines the sort weights of a character (from -f, -r).
*/
struct field {
struct column icol;
struct column tcol;
u_int flags;
u_char *mask;
u_char *weights;
};
union f_handle {
int top;
char **names;
};
extern int PANIC; /* maximum depth of fsort before fmerge is called */
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE;
extern int REC_D;
#include "extern.h"

87
usr.bin/sort/tmp.c Normal file
View File

@ -0,0 +1,87 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)tmp.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#include <sys/param.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "pathnames.h"
#define _NAME_TMP "sort.XXXXXXXX"
FILE *
ftmp()
{
static char *envtmp;
sigset_t set, oset;
static int first = 0;
FILE *fd;
char pathb[_POSIX_PATH_MAX], *path;
path = pathb;
if (!first && !envtmp) {
envtmp = getenv("TMPDIR");
first = 1;
}
if (envtmp)
(void)snprintf(path,
sizeof(pathb), "%s/%s", envtmp, _NAME_TMP);
else {
memmove(path, _PATH_SORTTMP, sizeof(_PATH_SORTTMP));
}
sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
path = mktemp(path);
if (!path)
err(2, "%s");
if (!(fd = fopen(path, "w+")))
err(2, "%s", path);
(void)unlink(path);
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
return (fd);
};