4.4BSD-Lite2 contrib/sort
This commit is contained in:
parent
5faa7161ce
commit
1d5d9b5b60
6
usr.bin/sort/Makefile
Normal file
6
usr.bin/sort/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
# @(#)Makefile 8.1 (Berkeley) 6/6/93
|
||||
|
||||
PROG= sort
|
||||
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
|
||||
|
||||
.include <bsd.prog.mk>
|
893
usr.bin/sort/TEST/stests
Normal file
893
usr.bin/sort/TEST/stests
Normal file
@ -0,0 +1,893 @@
|
||||
# @(#)stests 8.1 (Berkeley) 6/6/93
|
||||
|
||||
#Latest version. My sort passes all tests because I wrote it.
|
||||
#We differ only on 25E and 25H.
|
||||
#(I found at least one bug in constructing test 25, and was driven
|
||||
#to rewrite field parsing to clarify it.)
|
||||
#
|
||||
#In 25E, -k2.3,2.1b, the fields are not necessarily out of order.
|
||||
#Even if they were, it would be legal (11752-3), although certainly
|
||||
#justification for warning.
|
||||
#
|
||||
#On 25H, your answer is as defensible as mine. (Our suggestion
|
||||
#*1 backs mine.)
|
||||
|
||||
|
||||
# Tests for the Unix sort utility
|
||||
# Test Posix features except for locale.
|
||||
# Test some nonstandard features if present.
|
||||
|
||||
# Other tests should be made for files too big to fit in memory.
|
||||
|
||||
|
||||
# Initialize switches for nonstandard features.
|
||||
# Use parenthesized settings for supported features.
|
||||
|
||||
o=: # officially obsolescent features: +1 -2, misplaced -o (o=)
|
||||
g=: # -g numeric sort including e-format numbers (g=)
|
||||
M=: # -M sort by month names (M=)
|
||||
s=: # -s stable, do not compare raw bytes on equal keys (s=)
|
||||
y= # -y user-specified memory size (y=-y10000)
|
||||
|
||||
# Detect what features are supported, assuming bad options cause
|
||||
# errors. Set switches accordingly.
|
||||
|
||||
echo obsolescent and nonstandard features recognized, if any:
|
||||
if sort +0 </dev/null 2>/dev/null; then o=
|
||||
echo ' +1 -2'; fi
|
||||
if sort /dev/null -o xx 2>/dev/null; then o=
|
||||
echo ' displaced -o'; fi
|
||||
if sort -g </dev/null 2>/dev/null; then g=
|
||||
echo ' -g g-format numbers'; fi
|
||||
if sort -M </dev/null 2>/dev/null; then M=
|
||||
echo ' -M months'; fi
|
||||
if sort -s </dev/null 2>/dev/null; then s=
|
||||
echo ' -s stable'; fi
|
||||
if sort -y10000 </dev/null 2>/dev/null; then y=-y10000
|
||||
echo ' -y space'; fi
|
||||
if sort -z10000 </dev/null 2>/dev/null; then
|
||||
echo ' -z size (not exercised)'; fi
|
||||
if sort -T. </dev/null 2>/dev/null; then
|
||||
echo ' -T tempdir (not exercised)'; fi
|
||||
|
||||
|
||||
export TEST # major sequence number of test
|
||||
|
||||
trap "rm -f in in1 out xx -k xsort linecount fields; exit" 0 1 2 13 15
|
||||
|
||||
# xsort testno options
|
||||
# Sort file "in" with specified options.
|
||||
# Compare with file "out" if that is supplied,
|
||||
# otherwise make plausibility checks on output
|
||||
|
||||
# "sum" must be dumb; insensitive to the
|
||||
# order of lines within a file.
|
||||
# System V sum is suitable; sum -5 is the v10 equivalent.
|
||||
|
||||
PATH=.:$PATH
|
||||
export PATH
|
||||
cat <<'!' >xsort; chmod +x xsort
|
||||
|
||||
X=$1; shift
|
||||
|
||||
if sort "$@" in >xx && sort -c "$@" xx
|
||||
then
|
||||
if test -f out
|
||||
then
|
||||
cmp xx out >/dev/null && exit 0
|
||||
echo $TEST$X comparison failed
|
||||
else
|
||||
test "`cksum -o2 <in`" = "`cksum -o2 <xx`" && exit 0
|
||||
echo $TEST$X checksum failed
|
||||
fi
|
||||
else
|
||||
echo $TEST$X failed
|
||||
fi
|
||||
exit 1
|
||||
!
|
||||
|
||||
# linecount testno file count
|
||||
# declares the given "testno" to be in error if number of
|
||||
# lines in "file" differs from "count"
|
||||
|
||||
cat <<'!' >linecount; chmod +x linecount
|
||||
awk 'END{ if(NR!='$3') print "'$TEST$1' failed" }' $2
|
||||
!
|
||||
|
||||
rm -f out
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=01; echo $TEST # -c status, checksum
|
||||
# obsolescent features go together
|
||||
cat <<! >in
|
||||
b
|
||||
a
|
||||
!
|
||||
rm -f out -o
|
||||
|
||||
sort -c in 2>/dev/null && echo ${TEST}A failed
|
||||
|
||||
xsort B || '"cksum"' is probably unsuitable - see comments
|
||||
|
||||
$o sort +0 in -o in || echo ${TEST}c failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=02; echo $TEST # output from -c
|
||||
cat <<! >in
|
||||
x
|
||||
y
|
||||
!
|
||||
|
||||
sort -cr in >out 2>xx && echo ${TEST}A failed
|
||||
test -s out && echo ${TEST}B failed
|
||||
test -s xx && echo option -c is noisy "(probably legal)"
|
||||
test -s xx || echo option -c is quiet "(legal, not classical)"
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=03; echo $TEST # -n
|
||||
cat <<! >in
|
||||
-99.0
|
||||
-99.1
|
||||
-.0002
|
||||
-10
|
||||
2
|
||||
0010.000000000000000000000000000000000001
|
||||
10
|
||||
3x
|
||||
x
|
||||
!
|
||||
cat <<! >out
|
||||
-99.1
|
||||
-99.0
|
||||
-10
|
||||
-.0002
|
||||
x
|
||||
2
|
||||
3x
|
||||
10
|
||||
0010.000000000000000000000000000000000001
|
||||
!
|
||||
|
||||
xsort "" -n
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=04; echo $TEST # -b without fields, piping, -c status return
|
||||
cat <<! >in
|
||||
b
|
||||
a
|
||||
!
|
||||
cp in out
|
||||
|
||||
xsort A -b
|
||||
|
||||
cat in | sort | cat >xx
|
||||
cmp xx out >/dev/null || echo ${TEST}B failed
|
||||
|
||||
sort in | sort -cr 2>/dev/null && echo ${TEST}C failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=05; echo $TEST # fields, reverse fields, -c status return
|
||||
cat <<! >in
|
||||
b b p
|
||||
a b q
|
||||
x a
|
||||
!
|
||||
cat <<! >out
|
||||
x a
|
||||
a b q
|
||||
b b p
|
||||
!
|
||||
|
||||
$o xsort A +1 -2
|
||||
|
||||
$o xsort B +1 -2 +2r
|
||||
|
||||
xsort C -k 2,2
|
||||
|
||||
xsort D -k 2,2 -k 3r
|
||||
|
||||
xsort E -k 2,2.0
|
||||
|
||||
xsort F -k 2,2 -k 1,1 -k 3
|
||||
|
||||
sort -c -k 2 in 2>/dev/null && ${TEST}G failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=06; echo $TEST # -t
|
||||
cat <<! >in
|
||||
a:
|
||||
a!
|
||||
!
|
||||
cp in out
|
||||
|
||||
$o xsort A -t : -r +0
|
||||
|
||||
$o xsort B -t : +0 -1
|
||||
|
||||
xsort C -t : -r -k 1
|
||||
|
||||
xsort D -t : -k 1,1
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=07; echo $TEST # -t, character positions in fields
|
||||
# -t: as 1 arg is not strictly conforming, but classical
|
||||
cat <<! >in
|
||||
: ab
|
||||
:bac
|
||||
!
|
||||
cat <<! >out
|
||||
:bac
|
||||
: ab
|
||||
!
|
||||
|
||||
$o xsort A -b -t: +1.1
|
||||
|
||||
$o xsort B -t: +1.1r
|
||||
|
||||
xsort C -b -t: -k 2.2
|
||||
|
||||
xsort D -t: -k 2.2r
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=08; echo $TEST # space and tab as -t characters
|
||||
cat <<! >in
|
||||
b c
|
||||
b c
|
||||
b c
|
||||
!
|
||||
cp in out
|
||||
|
||||
xsort A -t ' ' -k2,2
|
||||
|
||||
xsort B -t ' ' -k2.1,2.0
|
||||
|
||||
cat <<! >out
|
||||
b c
|
||||
b c
|
||||
b c
|
||||
!
|
||||
|
||||
xsort C -t ' ' -k2,2
|
||||
|
||||
xsort D -t ' ' -k2.1,2.0
|
||||
|
||||
cat <<! >out
|
||||
b c
|
||||
b c
|
||||
b c
|
||||
!
|
||||
|
||||
xsort E -k2
|
||||
|
||||
cat <<! >out
|
||||
b c
|
||||
b c
|
||||
b c
|
||||
!
|
||||
|
||||
xsort F -k2b
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=09; echo $TEST # alphabetic as -t character
|
||||
cat <<! >in
|
||||
zXa
|
||||
yXa
|
||||
zXb
|
||||
!
|
||||
cp in out
|
||||
|
||||
xsort "" -tX -k2 -k1r,1
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=10; echo $TEST # -m
|
||||
cat <<! >in
|
||||
a
|
||||
ab
|
||||
ab
|
||||
bc
|
||||
ca
|
||||
!
|
||||
cat <<! >in1
|
||||
Z
|
||||
a
|
||||
aa
|
||||
ac
|
||||
c
|
||||
!
|
||||
cat <<! >out
|
||||
Z
|
||||
a
|
||||
a
|
||||
aa
|
||||
ab
|
||||
ab
|
||||
ac
|
||||
bc
|
||||
c
|
||||
ca
|
||||
!
|
||||
|
||||
sort -m in in1 >xx
|
||||
cmp xx out >/dev/null || echo $TEST failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=11; echo $TEST # multiple files, -o overwites input, -m, -mu
|
||||
cat <<! >in
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
!
|
||||
|
||||
sort -o xx in in in in in in in in in in in in in in in in in
|
||||
linecount A xx 68
|
||||
sort -o in -mu in in in in in in in in in in in in in in in in in
|
||||
linecount B in 4
|
||||
sort -o in -m in in in in in in in in in in in in in in in in in
|
||||
|
||||
cmp in xx >/dev/null || echo ${TEST}C failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=12; echo $TEST # does -mu pick the first among equals?
|
||||
cat <<! >in
|
||||
3B
|
||||
3b
|
||||
3B2
|
||||
~3B2
|
||||
4.1
|
||||
41
|
||||
5
|
||||
5.
|
||||
!
|
||||
cat <<! >out
|
||||
3B
|
||||
3B2
|
||||
4.1
|
||||
5
|
||||
!
|
||||
|
||||
xsort A -mudf || echo "(other behavior is legal, not classical)"
|
||||
|
||||
xsort B -mudf -k1 || echo "(other behavior is legal, not classical)"
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=13; echo $TEST # long records (>8000 bytes, keys >16000), -r
|
||||
awk '
|
||||
BEGIN { x="x"
|
||||
for(i=1; i<=12; i++) x = x x
|
||||
for(i=15; i<=25; i++) print x i
|
||||
}' >in
|
||||
awk '
|
||||
BEGIN { x="x"
|
||||
for(i=1; i<=12; i++) x = x x
|
||||
for(i=25; i>=15; i--) print x i
|
||||
}' >out
|
||||
|
||||
xsort A -r
|
||||
|
||||
xsort B -k 1,1r -k 1
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=14; echo $TEST "(3 long parts)"
|
||||
awk 'BEGIN { for(i=0; i<100000; i++) print rand() }' | grep -v e >in
|
||||
rm -f out
|
||||
|
||||
xsort A; echo $TEST "(part A done)"
|
||||
|
||||
xsort B -n; echo $TEST "(part B done)"
|
||||
|
||||
# next test is unclean: xx is a hidden side-effect of xsort
|
||||
|
||||
awk '
|
||||
$0 < x { print "test '${TEST}C' failed"; exit }
|
||||
$0 "" != x { print >"out"; x = $0 }
|
||||
' xx
|
||||
|
||||
xsort C -n -u
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=15; echo $TEST "(long)" # force intermediate files if possible
|
||||
awk 'BEGIN { for(i=0; i<20000; i++) print rand() }' >in
|
||||
rm -f out
|
||||
|
||||
xsort A -r $y
|
||||
|
||||
sort -r in | awk '$0 "x" != x { print ; x = $0 "x" }' >out
|
||||
|
||||
xsort B -u -r $y
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=16; echo $TEST # -nr, -nm, file name -
|
||||
awk 'BEGIN { for(i=-100; i<=100; i+=2) printf "%.10d\n", i }' >in
|
||||
|
||||
awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -nr in - >xx
|
||||
awk '$0+0 != 101-NR { print "'${TEST}A' failed"; exit }' xx
|
||||
|
||||
awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -mn in - >xx
|
||||
awk '$0+0 != -101+NR { print "'${TEST}B' failed"; exit }' xx
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=17; echo $TEST # -d, fields without end, modifier override
|
||||
cat <<! >in
|
||||
a-B
|
||||
a+b
|
||||
a b
|
||||
A+b
|
||||
a b
|
||||
!
|
||||
cat <<! >out
|
||||
a b
|
||||
a b
|
||||
A+b
|
||||
a-B
|
||||
a+b
|
||||
!
|
||||
|
||||
$o xsort A -df +0 +0d
|
||||
|
||||
xsort B -df -k 1 -k 1d
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=18; echo $TEST # -u on key only
|
||||
cat <<! >in
|
||||
12 y
|
||||
13 z
|
||||
12 x
|
||||
!
|
||||
cat <<! >out
|
||||
12 x
|
||||
12 y
|
||||
13 z
|
||||
!
|
||||
|
||||
$o xsort A +0 -1
|
||||
|
||||
xsort B -k 1,1
|
||||
|
||||
sort -u -k 1,1 in >xx
|
||||
linecount C xx 2
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=19; echo $TEST # -i, -d, -f
|
||||
cat <<! >xx.c
|
||||
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
|
||||
main(){ run(0, 011); /* 012=='\n' */
|
||||
run(013, 0377); }
|
||||
!
|
||||
cc xx.c
|
||||
a.out >in
|
||||
cat <<! >xx.c
|
||||
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
|
||||
main(){ run(0, 011);
|
||||
run(013, ' '-1);
|
||||
run(0177, 0377);
|
||||
run(' ', 0176); }
|
||||
!
|
||||
cc xx.c
|
||||
a.out >out
|
||||
|
||||
xsort A -i -k 2
|
||||
|
||||
cat <<! >xx.c
|
||||
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
|
||||
main(){ run(0, 010); /* 011=='\t', 012=='\n' */
|
||||
run(013, ' '-1);
|
||||
run(' '+1, '0'-1);
|
||||
run('9'+1, 'A'-1);
|
||||
run('Z'+1, 'a'-1);
|
||||
run('z'+1, 0377);
|
||||
run('\t', '\t');
|
||||
run(' ', ' ');
|
||||
run('0', '9');
|
||||
run('A', 'Z');
|
||||
run('a', 'z'); }
|
||||
!
|
||||
cc xx.c
|
||||
a.out >out
|
||||
|
||||
xsort B -d -k 2
|
||||
|
||||
cat <<! >xx.c
|
||||
run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
|
||||
main(){ int i;
|
||||
run(0, 011);
|
||||
run(013, 'A'-1);
|
||||
for(i='A'; i<='Z'; i++)
|
||||
printf("%.3o %c\n%.3o %c\n",i,i,i+040,i+040);
|
||||
run('Z'+1, 'a'-1);
|
||||
run('z'+1, 0377); }
|
||||
!
|
||||
cc xx.c
|
||||
a.out >out
|
||||
rm xx.c
|
||||
|
||||
xsort C -f -k 2
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=20; echo $TEST # -d, -f, -b applies only to fields
|
||||
cat <<! >in
|
||||
b
|
||||
'C
|
||||
a
|
||||
!
|
||||
cp in out
|
||||
|
||||
xsort A -d
|
||||
|
||||
xsort B -f
|
||||
|
||||
cat <<! >out
|
||||
b
|
||||
a
|
||||
'C
|
||||
!
|
||||
|
||||
xsort C -dfb
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=21; echo $TEST # behavior of null bytes
|
||||
cat <<'!' >xx.c
|
||||
main() { printf("%cb\n%ca\n",0,0); }
|
||||
!
|
||||
cc xx.c
|
||||
a.out >in
|
||||
sort in >xx
|
||||
cmp in xx >/dev/null && echo ${TEST}A failed
|
||||
test "`wc -c <in`" = "`wc -c <xx`" || echo ${TEST}B failed
|
||||
rm xx.c a.out
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=22; echo $TEST # field limits
|
||||
cat <<! >in
|
||||
a 2
|
||||
a 1
|
||||
b 2
|
||||
b 1
|
||||
!
|
||||
cat <<! >out
|
||||
b 1
|
||||
b 2
|
||||
a 1
|
||||
a 2
|
||||
!
|
||||
|
||||
xsort "" -r -k1,1 -k2n
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=23; echo $TEST # empty file
|
||||
|
||||
sort -o xx </dev/null
|
||||
cmp xx /dev/null 2>/dev/null || echo ${TEST}A failed
|
||||
|
||||
sort -c </dev/null || echo ${TEST}B failed
|
||||
|
||||
sort -cu </dev/null || echo ${TEST}C failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=24; echo $TEST # many fields
|
||||
cat <<! >in
|
||||
0:2:3:4:5:6:7:8:9
|
||||
1:1:3:4:5:6:7:8:9
|
||||
1:2:2:4:5:6:7:8:9
|
||||
1:2:3:3:5:6:7:8:9
|
||||
1:2:3:4:4:6:7:8:9
|
||||
1:2:3:4:5:5:7:8:9
|
||||
1:2:3:4:5:6:6:8:9
|
||||
1:2:3:4:5:6:7:7:9
|
||||
1:2:3:4:5:6:7:8:8
|
||||
!
|
||||
cat <<! >out
|
||||
1:2:3:4:5:6:7:8:8
|
||||
1:2:3:4:5:6:7:7:9
|
||||
1:2:3:4:5:6:6:8:9
|
||||
1:2:3:4:5:5:7:8:9
|
||||
1:2:3:4:4:6:7:8:9
|
||||
1:2:3:3:5:6:7:8:9
|
||||
1:2:2:4:5:6:7:8:9
|
||||
1:1:3:4:5:6:7:8:9
|
||||
0:2:3:4:5:6:7:8:9
|
||||
!
|
||||
|
||||
xsort "" -t: -k9 -k8 -k7 -k6 -k5 -k4 -k3 -k2 -k1
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=25; echo $TEST # variously specified alpha fields
|
||||
# numbers give the correct orderings
|
||||
cat <<! >in
|
||||
01:04:19:01:16:01:21:01 a
|
||||
02:03:13:15:13:19:15:02 a
|
||||
03:02:07:09:07:13:09:03 a
|
||||
04:01:01:03:01:07:03:04 a
|
||||
05:08:20:16:17:02:20:05 aa
|
||||
06:07:14:18:14:20:14:06 aa
|
||||
07:06:08:10:08:14:08:07 aa
|
||||
08:05:02:04:02:08:02:08 aa
|
||||
09:16:22:02:22:04:24:13 b
|
||||
10:15:16:20:19:22:18:14 b
|
||||
11:14:10:12:10:16:12:15 b
|
||||
12:13:04:06:04:10:06:16 b
|
||||
13:24:24:22:24:06:22:21 bb
|
||||
14:23:18:24:21:24:16:22 bb
|
||||
15:22:12:14:12:18:10:23 bb
|
||||
16:21:06:08:06:12:04:24 bb
|
||||
17:12:21:21:18:03:19:09 ab
|
||||
18:11:15:19:15:21:13:10 ab
|
||||
19:10:09:11:09:15:07:11 ab
|
||||
20:09:03:05:03:09:01:12 ab
|
||||
21:20:23:17:23:05:23:17 ba
|
||||
22:19:17:23:20:23:17:18 ba
|
||||
23:18:11:13:11:17:11:19 ba
|
||||
24:17:05:07:05:11:05:20 ba
|
||||
!
|
||||
sort -k2b -k2 in >xx &&
|
||||
sort -c -t: -k2n xx 2>/dev/null || echo ${TEST}A failed
|
||||
sort -k2,2.1b -k2 in >xx &&
|
||||
sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}B failed
|
||||
sort -k2.3 -k2 in >xx &&
|
||||
sort -c -t: -k4n xx 2>/dev/null || echo ${TEST}C failed
|
||||
sort -k2b,2.3 -k2 in >xx &&
|
||||
sort -c -t: -k5n xx 2>/dev/null || echo ${TEST}D failed
|
||||
sort -k2.3,2.1b -k2 in >xx &&
|
||||
sort -c -t: -k6n xx 2>/dev/null || echo ${TEST}E failed
|
||||
sort -k2,2.1b -k2r in >xx &&
|
||||
sort -c -t: -k7n xx 2>/dev/null || echo ${TEST}F failed
|
||||
sort -b -k2,2 -k2 in >xx &&
|
||||
sort -c -t: -k8n xx 2>/dev/null || echo ${TEST}G failed
|
||||
sort -b -k2,2b -k2 in >xx && # perhaps same as G
|
||||
sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}H failed\
|
||||
"(standard is not clear on this)"
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=26; echo $TEST # empty fields, out of bounds fields
|
||||
cat <<! >in
|
||||
0 5
|
||||
1 4
|
||||
2 3
|
||||
3 2
|
||||
4 1
|
||||
5 0
|
||||
!
|
||||
cp in out
|
||||
|
||||
xsort "" -k2.2,2.1 -k2.3,2.4
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=27; echo $TEST # displaced -o
|
||||
rm -f out
|
||||
|
||||
$o sort /dev/null -o out || $o echo ${TEST}B failed
|
||||
$o test -f out || $o echo ${TEST}C failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=28; echo $TEST # apparently nonmonotone field specs
|
||||
cat <<! >in
|
||||
aaaa c
|
||||
x a
|
||||
0 b
|
||||
!
|
||||
cp in out
|
||||
|
||||
$o xsort A +1 -0.3 +1.4 -1.5
|
||||
|
||||
xsort B -k2,1.3 -k2.5,2.5
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=29; echo $TEST # determination of end of option list
|
||||
cat >-k <<!
|
||||
x
|
||||
!
|
||||
rm -f out -c
|
||||
|
||||
sort -- -k </dev/null >xx || echo ${TEST}A argument failed
|
||||
cmp xx -k || echo ${TEST}A comparison failed
|
||||
|
||||
sort - -c </dev/null 2>/dev/null && echo ${TEST}B failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=30; echo $TEST # missing newline
|
||||
awk 'BEGIN{ printf "%s", "x"}' | sort >xx
|
||||
wc -c <xx | awk '$1!=2{ print "'${TEST}' failed" }'
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=31; echo $TEST # -M, multiple fields
|
||||
cat <<! >in
|
||||
jan 10 1900
|
||||
Feb 26 1900
|
||||
feb 25 1900
|
||||
January xx 1900
|
||||
August 11 1900
|
||||
jan 15 1990
|
||||
feb 22 1990
|
||||
mar 15 1990
|
||||
apr 1 1990
|
||||
may 45 1990
|
||||
jun 14 1990
|
||||
jul 4 1990
|
||||
aug 1~ 1990
|
||||
aug 11 1990
|
||||
sep 1 1990
|
||||
oct 12 1990
|
||||
nov 24 1990
|
||||
dec 25 1990
|
||||
never 3 1990
|
||||
Dec 25 1990
|
||||
!
|
||||
cat <<! >out
|
||||
January xx 1900
|
||||
jan 10 1900
|
||||
feb 25 1900
|
||||
Feb 26 1900
|
||||
August 11 1900
|
||||
never 3 1990
|
||||
jan 15 1990
|
||||
feb 22 1990
|
||||
mar 15 1990
|
||||
apr 1 1990
|
||||
may 45 1990
|
||||
jun 14 1990
|
||||
jul 4 1990
|
||||
aug 1~ 1990
|
||||
aug 11 1990
|
||||
sep 1 1990
|
||||
oct 12 1990
|
||||
nov 24 1990
|
||||
Dec 25 1990
|
||||
dec 25 1990
|
||||
!
|
||||
|
||||
$M xsort "" -k3n -k1M -k2n
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=32; echo $TEST # -M case insensitivity, -r
|
||||
cat <<! >in
|
||||
x
|
||||
june
|
||||
january
|
||||
december
|
||||
!
|
||||
cat <<! >out
|
||||
december
|
||||
june
|
||||
january
|
||||
x
|
||||
!
|
||||
|
||||
$M xsort "" -Mr
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=33; echo $TEST # -g
|
||||
cat <<! >in
|
||||
2
|
||||
1
|
||||
10
|
||||
.2
|
||||
1e
|
||||
1E1
|
||||
1e.
|
||||
!
|
||||
cat <<! >out
|
||||
.2
|
||||
1
|
||||
1e
|
||||
1e.
|
||||
2
|
||||
10
|
||||
1E1
|
||||
!
|
||||
|
||||
$g xsort "" -g
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=34; echo $TEST # -g wide operands
|
||||
cat <<! >in
|
||||
.99999999999999999999
|
||||
099999999999999999999e-21
|
||||
099999999999999999999e-19
|
||||
.1e1
|
||||
!
|
||||
cat <<! >out
|
||||
099999999999999999999e-21
|
||||
.99999999999999999999
|
||||
.1e1
|
||||
099999999999999999999e-19
|
||||
!
|
||||
|
||||
$g xsort A -g
|
||||
|
||||
cat <<! >out
|
||||
.1e1
|
||||
.99999999999999999999
|
||||
099999999999999999999e-19
|
||||
099999999999999999999e-21
|
||||
!
|
||||
|
||||
xsort B -n
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=35; echo $TEST #-g, -u with different fp reps
|
||||
cat <<! >in
|
||||
+0
|
||||
-0
|
||||
0.10
|
||||
+.1
|
||||
-.1
|
||||
-100e-3
|
||||
x
|
||||
!
|
||||
cat <<! >out
|
||||
-.1
|
||||
-100e-3
|
||||
+0
|
||||
-0
|
||||
x
|
||||
+.1
|
||||
0.10
|
||||
!
|
||||
|
||||
$g xsort A -g
|
||||
|
||||
$g sort -gu in >xx && $g sort -c -gu xx || echo ${TEST}B failed
|
||||
$g linecount C xx 3
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=36; echo $TEST # -s
|
||||
cat <<! >in
|
||||
a 2
|
||||
b 1
|
||||
c 2
|
||||
a 1
|
||||
b 2
|
||||
c 1
|
||||
!
|
||||
cat <<! >out
|
||||
a 2
|
||||
a 1
|
||||
b 1
|
||||
b 2
|
||||
c 2
|
||||
c 1
|
||||
!
|
||||
|
||||
$s xsort "" -s -k1,1
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=37; echo $TEST # -s, multiple files
|
||||
cat <<! >in
|
||||
a 2
|
||||
c 2
|
||||
!
|
||||
cat <<! >in1
|
||||
a 1
|
||||
b 1
|
||||
c 1
|
||||
!
|
||||
cat <<! >out
|
||||
c 2
|
||||
b 1
|
||||
a 2
|
||||
!
|
||||
|
||||
$s sort -smru -k1,1 in in in1 in1 >xx
|
||||
$s cmp xx out >/dev/null || echo $TEST failed
|
||||
|
||||
#---------------------------------------------------------------
|
||||
TEST=38; echo $TEST # -s
|
||||
$s awk '
|
||||
BEGIN {
|
||||
for(i=1; i<50; i++)
|
||||
for(j=1; j<=i; j++) {
|
||||
print i, 2 >"in"
|
||||
print i, 1 >"in1"
|
||||
}
|
||||
}'
|
||||
|
||||
$s sort -m -s -k1,1n in in1 >out
|
||||
|
||||
$s awk '
|
||||
func stop() { print "'$TEST' failed"; exit }
|
||||
$1!=last1 { if(count!=last1 || $2!=2) stop();
|
||||
count = 0}
|
||||
$1==last1 && $2!=last2 { if(count!=last1 || $2!=1) stop();
|
||||
count = 0 }
|
||||
{ count++; last1 = $1; last2 = $2 }
|
||||
' out
|
188
usr.bin/sort/append.c
Normal file
188
usr.bin/sort/append.c
Normal file
@ -0,0 +1,188 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define OUTPUT { \
|
||||
if ((n = cpos - ppos) > 1) { \
|
||||
for (; ppos < cpos; ++ppos) \
|
||||
*ppos -= odepth; \
|
||||
ppos -= n; \
|
||||
radixsort(ppos, n, wts1, REC_D); \
|
||||
for (; ppos < cpos; ppos++) { \
|
||||
prec = (RECHEADER *) (*ppos - sizeof(TRECHEADER));\
|
||||
put(prec, fd); \
|
||||
} \
|
||||
} else put(prec, fd); \
|
||||
}
|
||||
|
||||
/*
|
||||
* copy sorted lines to output; check for uniqueness
|
||||
*/
|
||||
void
|
||||
append(keylist, nelem, depth, fd, put, ftbl)
|
||||
u_char **keylist;
|
||||
int nelem;
|
||||
register int depth;
|
||||
FILE *fd;
|
||||
void (*put)(RECHEADER *, FILE *);
|
||||
struct field *ftbl;
|
||||
{
|
||||
register u_char *wts, *wts1;
|
||||
register n, odepth;
|
||||
register u_char **cpos, **ppos, **lastkey;
|
||||
register u_char *cend, *pend, *start;
|
||||
register struct recheader *crec, *prec;
|
||||
|
||||
if (*keylist == '\0' && UNIQUE)
|
||||
return;
|
||||
wts1 = wts = ftbl[0].weights;
|
||||
if ((!UNIQUE) && SINGL_FLD) {
|
||||
if (ftbl[0].flags & F && ftbl[0].flags & R)
|
||||
wts1 = Rascii;
|
||||
else if (ftbl[0].flags & F)
|
||||
wts1 = ascii;
|
||||
odepth = depth;
|
||||
}
|
||||
lastkey = keylist + nelem;
|
||||
depth += sizeof(TRECHEADER);
|
||||
if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
|
||||
ppos = keylist;
|
||||
prec = (RECHEADER *) (*ppos - depth);
|
||||
if (UNIQUE)
|
||||
put(prec, fd);
|
||||
for (cpos = keylist+1; cpos < lastkey; cpos++) {
|
||||
crec = (RECHEADER *) (*cpos - depth);
|
||||
if (crec->length == prec->length) {
|
||||
pend = (u_char *) &prec->offset + prec->length;
|
||||
cend = (u_char *) &crec->offset + crec->length;
|
||||
for (start = *cpos; cend >= start; cend--) {
|
||||
if (wts[*cend] != wts[*pend])
|
||||
break;
|
||||
pend--;
|
||||
}
|
||||
if (pend + 1 != *ppos) {
|
||||
if (!UNIQUE) {
|
||||
OUTPUT;
|
||||
} else
|
||||
put(crec, fd);
|
||||
ppos = cpos;
|
||||
prec = crec;
|
||||
}
|
||||
} else {
|
||||
if (!UNIQUE) {
|
||||
OUTPUT;
|
||||
} else
|
||||
put(crec, fd);
|
||||
ppos = cpos;
|
||||
prec = crec;
|
||||
}
|
||||
}
|
||||
if (!UNIQUE) { OUTPUT; }
|
||||
} else if (UNIQUE) {
|
||||
ppos = keylist;
|
||||
prec = (RECHEADER *) (*ppos - depth);
|
||||
put(prec, fd);
|
||||
for (cpos = keylist+1; cpos < lastkey; cpos++) {
|
||||
crec = (RECHEADER *) (*cpos - depth);
|
||||
if (crec->offset == prec->offset) {
|
||||
pend = (u_char *) &prec->offset + prec->offset;
|
||||
cend = (u_char *) &crec->offset + crec->offset;
|
||||
for (start = *cpos; cend >= start; cend--) {
|
||||
if (wts[*cend] != wts[*pend])
|
||||
break;
|
||||
pend--;
|
||||
}
|
||||
if (pend + 1 != *ppos) {
|
||||
ppos = cpos;
|
||||
prec = crec;
|
||||
put(prec, fd);
|
||||
}
|
||||
} else {
|
||||
ppos = cpos;
|
||||
prec = crec;
|
||||
put(prec, fd);
|
||||
}
|
||||
}
|
||||
} else for (cpos = keylist; cpos < lastkey; cpos++) {
|
||||
crec = (RECHEADER *) (*cpos - depth);
|
||||
put(crec, fd);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* output the already sorted eol bin.
|
||||
*/
|
||||
void
|
||||
rd_append(binno, infl0, nfiles, outfd, buffer, bufend)
|
||||
u_char *buffer, *bufend;
|
||||
int binno, nfiles;
|
||||
union f_handle infl0;
|
||||
FILE *outfd;
|
||||
{
|
||||
struct recheader *rec;
|
||||
rec = (RECHEADER *) buffer;
|
||||
if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
|
||||
putline(rec, outfd);
|
||||
while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
|
||||
bufend, 0) == 0) {
|
||||
if (!UNIQUE)
|
||||
putline(rec, outfd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* append plain text--used after sorting the biggest bin.
|
||||
*/
|
||||
void
|
||||
concat(a, b)
|
||||
FILE *a, *b;
|
||||
{
|
||||
int nread;
|
||||
char buffer[4096];
|
||||
|
||||
rewind(b);
|
||||
while ((nread = fread(buffer, 1, 4096, b)) > 0)
|
||||
EWRITE(buffer, 1, nread, a);
|
||||
}
|
67
usr.bin/sort/extern.h
Normal file
67
usr.bin/sort/extern.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)extern.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
void append __P((u_char **, int, int, FILE *, void (*)(), struct field *));
|
||||
void concat __P((FILE *, FILE *));
|
||||
length_t enterkey __P((struct recheader *,
|
||||
DBT *, int, struct field *));
|
||||
void fixit __P((int *, char **));
|
||||
void fldreset __P((struct field *));
|
||||
FILE *ftmp __P((void));
|
||||
void fmerge __P((int, union f_handle,
|
||||
int, int (*)(), FILE *, void (*)(), struct field *));
|
||||
void fsort __P((int, int, union f_handle, int, FILE *, struct field *));
|
||||
int geteasy __P((int, union f_handle,
|
||||
int, struct recheader *, u_char *, struct field *));
|
||||
int getnext __P((int, union f_handle,
|
||||
int, struct recheader *, u_char *, struct field *));
|
||||
int makekey __P((int, union f_handle,
|
||||
int, struct recheader *, u_char *, struct field *));
|
||||
int makeline __P((int, union f_handle,
|
||||
int, struct recheader *, u_char *, struct field *));
|
||||
void merge __P((int, int, int (*)(), FILE *, void (*)(), struct field *));
|
||||
void num_init __P((void));
|
||||
void onepass __P((u_char **, int, long, long *, u_char *, FILE *));
|
||||
int optval __P((int, int));
|
||||
void order __P((union f_handle, int (*)(), struct field *));
|
||||
void putline __P((struct recheader *, FILE *));
|
||||
void putrec __P((struct recheader *, FILE *));
|
||||
void rd_append __P((int, union f_handle, int, FILE *, u_char *, u_char *));
|
||||
int seq __P((FILE *, DBT *, DBT *));
|
||||
int setfield __P((char *, struct field *, int));
|
||||
void settables __P((int));
|
319
usr.bin/sort/fields.c
Normal file
319
usr.bin/sort/fields.c
Normal file
@ -0,0 +1,319 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
/* Subroutines to generate sort keys. */
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
#define blancmange(ptr) { \
|
||||
if (BLANK & d_mask[*(ptr)]) \
|
||||
while (BLANK & d_mask[*(++(ptr))]); \
|
||||
}
|
||||
|
||||
#define NEXTCOL(pos) { \
|
||||
if (!SEP_FLAG) \
|
||||
while (BLANK & l_d_mask[*(++pos)]); \
|
||||
while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
|
||||
}
|
||||
|
||||
extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
|
||||
|
||||
extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
|
||||
|
||||
extern struct coldesc clist[(ND+1)*2];
|
||||
extern int ncols;
|
||||
|
||||
#define DECIMAL '.'
|
||||
#define OFFSET 128
|
||||
|
||||
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
|
||||
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
|
||||
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
|
||||
u_char fnum[NBINS], rnum[NBINS];
|
||||
|
||||
/*
|
||||
* constructs sort key with leading recheader, followed by the key,
|
||||
* followed by the original line.
|
||||
*/
|
||||
length_t
|
||||
enterkey(keybuf, line, size, fieldtable)
|
||||
struct recheader *keybuf; /* pointer to start of key */
|
||||
DBT *line;
|
||||
int size;
|
||||
struct field fieldtable[];
|
||||
{
|
||||
int i;
|
||||
register u_char *l_d_mask;
|
||||
register u_char *lineend, *pos;
|
||||
u_char *endkey, *keypos;
|
||||
register struct coldesc *clpos;
|
||||
register int col = 1;
|
||||
struct field *ftpos;
|
||||
l_d_mask = d_mask;
|
||||
pos = (u_char *) line->data - 1;
|
||||
lineend = (u_char *) line->data + line->size-1;
|
||||
/* don't include rec_delimiter */
|
||||
keypos = keybuf->data;
|
||||
|
||||
for (i = 0; i < ncols; i++) {
|
||||
clpos = clist + i;
|
||||
for (; (col < clpos->num) && (pos < lineend); col++)
|
||||
{ NEXTCOL(pos); }
|
||||
if (pos >= lineend)
|
||||
break;
|
||||
clpos->start = SEP_FLAG ? pos + 1 : pos;
|
||||
NEXTCOL(pos);
|
||||
clpos->end = pos;
|
||||
col++;
|
||||
if (pos >= lineend) {
|
||||
clpos->end = lineend;
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; i <= ncols; i++)
|
||||
clist[i].start = clist[i].end = lineend;
|
||||
if (clist[0].start < (u_char *) line->data)
|
||||
++clist[0].start;
|
||||
endkey = (u_char *) keybuf + size - line->size;
|
||||
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
|
||||
if ((keypos = enterfield(keypos, endkey, ftpos,
|
||||
fieldtable->flags)) == NULL)
|
||||
return (1);
|
||||
|
||||
if (UNIQUE)
|
||||
*(keypos-1) = REC_D;
|
||||
keybuf->offset = keypos - keybuf->data;
|
||||
keybuf->length = keybuf->offset + line->size;
|
||||
if (keybuf->length + sizeof(TRECHEADER) > size)
|
||||
return (1); /* line too long for buffer */
|
||||
memcpy(keybuf->data + keybuf->offset, line->data, line->size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* constructs a field (as defined by -k) within a key
|
||||
*/
|
||||
u_char *
|
||||
enterfield(tablepos, endkey, cur_fld, gflags)
|
||||
struct field *cur_fld;
|
||||
register u_char *tablepos, *endkey;
|
||||
int gflags;
|
||||
{
|
||||
register u_char *start, *end, *lineend, *mask, *lweight;
|
||||
struct column icol, tcol;
|
||||
register u_int flags;
|
||||
u_int Rflag;
|
||||
icol = cur_fld->icol;
|
||||
tcol = cur_fld->tcol;
|
||||
flags = cur_fld->flags;
|
||||
start = icol.p->start;
|
||||
lineend = clist[ncols].end;
|
||||
if (flags & BI)
|
||||
blancmange(start);
|
||||
start += icol.indent;
|
||||
start = min(start, lineend);
|
||||
if (!tcol.num)
|
||||
end = lineend;
|
||||
else {
|
||||
if (tcol.indent) {
|
||||
end = tcol.p->start;
|
||||
if (flags & BT) blancmange(end);
|
||||
end += tcol.indent;
|
||||
end = min(end, lineend);
|
||||
} else
|
||||
end = tcol.p->end;
|
||||
}
|
||||
if (flags & N) {
|
||||
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
|
||||
tablepos = number(tablepos, endkey, start, end, Rflag);
|
||||
return (tablepos);
|
||||
}
|
||||
mask = alltable;
|
||||
mask = cur_fld->mask;
|
||||
lweight = cur_fld->weights;
|
||||
for (; start < end; start++)
|
||||
if (mask[*start]) {
|
||||
if (*start <= 1) {
|
||||
if (tablepos+2 >= endkey)
|
||||
return (NULL);
|
||||
*tablepos++ = lweight[1];
|
||||
*tablepos++ = lweight[*start ? 2 : 1];
|
||||
} else {
|
||||
*tablepos++ = lweight[*start];
|
||||
if (tablepos == endkey)
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
*tablepos++ = lweight[0];
|
||||
return (tablepos == endkey ? NULL : tablepos);
|
||||
}
|
||||
|
||||
/* Uses the first bin to assign sign, expsign, 0, and the first
|
||||
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
|
||||
* When sorting in forward order:
|
||||
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
|
||||
* else use (0-99)->(2-102).
|
||||
* If the exponent is >=61, use another byte for each additional 253
|
||||
* in the exponent. Cutoff is at 567.
|
||||
* To avoid confusing the exponent and the mantissa, use a field delimiter
|
||||
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
|
||||
* only time a field delimiter can come in that position.
|
||||
* Reverse order is done analagously.
|
||||
*/
|
||||
|
||||
u_char *
|
||||
number(pos, bufend, line, lineend, Rflag)
|
||||
register u_char *line, *pos, *bufend, *lineend;
|
||||
int Rflag;
|
||||
{
|
||||
register int or_sign, parity = 0;
|
||||
register int expincr = 1, exponent = -1;
|
||||
int bite, expsign = 1, sign = 1;
|
||||
register u_char lastvalue, *nonzero, *tline, *C_TENS;
|
||||
u_char *nweights;
|
||||
|
||||
if (Rflag)
|
||||
nweights = rnum;
|
||||
else
|
||||
nweights = fnum;
|
||||
if (pos > bufend - 8)
|
||||
return (NULL);
|
||||
/* or_sign sets the sort direction:
|
||||
* (-r: +/-)(sign: +/-)(expsign: +/-) */
|
||||
or_sign = sign ^ expsign ^ Rflag;
|
||||
blancmange(line);
|
||||
if (*line == '-') { /* set the sign */
|
||||
or_sign ^= 1;
|
||||
sign = 0;
|
||||
line++;
|
||||
}
|
||||
/* eat initial zeroes */
|
||||
for (; *line == '0' && line < lineend; line++);
|
||||
/* calculate exponents < 0 */
|
||||
if (*line == DECIMAL) {
|
||||
exponent = 1;
|
||||
while (*++line == '0' && line < lineend)
|
||||
exponent++;
|
||||
expincr = 0;
|
||||
expsign = 0;
|
||||
}
|
||||
/* next character better be a digit */
|
||||
if (*line < '1' || *line > '9' || line >= lineend) {
|
||||
*pos++ = nweights[127];
|
||||
return (pos);
|
||||
}
|
||||
if (expincr) {
|
||||
for (tline = line-1; *++tline >= '0' &&
|
||||
*tline <= '9' && tline < lineend;)
|
||||
exponent++;
|
||||
}
|
||||
if (exponent > 567) {
|
||||
*pos++ = nweights[sign ? (expsign ? 254 : 128)
|
||||
: (expsign ? 0 : 126)];
|
||||
warnx("exponent out of bounds");
|
||||
return (pos);
|
||||
}
|
||||
bite = min(exponent, 61);
|
||||
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
|
||||
: (expsign ? 64-bite : 64+bite)];
|
||||
if (bite >= 61) {
|
||||
do {
|
||||
exponent -= bite;
|
||||
bite = min(exponent, 254);
|
||||
*pos++ = nweights[or_sign ? 254-bite : bite];
|
||||
} while (bite == 254);
|
||||
}
|
||||
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
|
||||
for (; line < lineend; line++) {
|
||||
if (*line >= '0' && *line <= '9') {
|
||||
if (parity) {
|
||||
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
|
||||
: *line);
|
||||
if (pos == bufend)
|
||||
return (NULL);
|
||||
if (*line != '0' || lastvalue != '0')
|
||||
nonzero = pos;
|
||||
} else
|
||||
lastvalue = *line;
|
||||
parity ^= 1;
|
||||
} else if(*line == DECIMAL) {
|
||||
if(!expincr) /* a decimal already occurred once */
|
||||
break;
|
||||
expincr = 0;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
if (parity && lastvalue != '0') {
|
||||
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
|
||||
OFF_TENS[lastvalue] + '0';
|
||||
} else
|
||||
pos = nonzero;
|
||||
if (pos > bufend-1)
|
||||
return (NULL);
|
||||
*pos++ = or_sign ? nweights[254] : nweights[0];
|
||||
return (pos);
|
||||
}
|
||||
|
||||
/* This forces a gap around the record delimiter
|
||||
* Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
|
||||
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
|
||||
*/
|
||||
void
|
||||
num_init()
|
||||
{
|
||||
int i;
|
||||
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
|
||||
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
|
||||
OFF_TENS = TENS - '0';
|
||||
OFF_NTENS = NEGTENS - '0';
|
||||
for (i = 1; i < 10; i++) {
|
||||
TENS[i] = TENS[i-1] + 10;
|
||||
NEGTENS[i] = NEGTENS[i-1] - 10;
|
||||
}
|
||||
for (i = 0; i < REC_D; i++) {
|
||||
fnum[i] = i;
|
||||
rnum[255-i] = i;
|
||||
}
|
||||
for (i = REC_D; i <255; i++) {
|
||||
fnum[i] = i+1;
|
||||
rnum[255-i] = i-1;
|
||||
}
|
||||
}
|
338
usr.bin/sort/files.c
Normal file
338
usr.bin/sort/files.c
Normal file
@ -0,0 +1,338 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* this is the subroutine for file management for fsort().
|
||||
* It keeps the buffers for all temporary files.
|
||||
*/
|
||||
int
|
||||
getnext(binno, infl0, nfiles, pos, end, dummy)
|
||||
int binno, nfiles;
|
||||
union f_handle infl0;
|
||||
register struct recheader *pos;
|
||||
register u_char *end;
|
||||
struct field *dummy;
|
||||
{
|
||||
register int i;
|
||||
register u_char *hp;
|
||||
static long nleft = 0;
|
||||
static int cnt = 0, flag = -1;
|
||||
static u_char maxb = 0;
|
||||
static FILE *fd;
|
||||
|
||||
if (nleft == 0) {
|
||||
if (binno < 0) /* reset files. */ {
|
||||
for (i = 0; i < nfiles; i++) {
|
||||
rewind(fstack[infl0.top + i].fd);
|
||||
fstack[infl0.top + i].max_o = 0;
|
||||
}
|
||||
flag = -1;
|
||||
nleft = cnt = 0;
|
||||
return(-1);
|
||||
}
|
||||
maxb = fstack[infl0.top].maxb;
|
||||
for (; nleft == 0; cnt++) {
|
||||
if (cnt >= nfiles) {
|
||||
cnt = 0;
|
||||
return (EOF);
|
||||
}
|
||||
fd = fstack[infl0.top + cnt].fd;
|
||||
hp = (u_char *) &nleft;
|
||||
for (i = sizeof(TRECHEADER); i; --i)
|
||||
*hp++ = getc(fd);
|
||||
if (binno < maxb)
|
||||
fstack[infl0.top+cnt].max_o
|
||||
+= sizeof(nleft) + nleft;
|
||||
else if (binno == maxb) {
|
||||
if (binno != fstack[infl0.top].lastb) {
|
||||
fseek(fd, fstack[infl0.top+
|
||||
cnt].max_o, SEEK_SET);
|
||||
fread(&nleft, sizeof(nleft), 1, fd);
|
||||
}
|
||||
if (nleft == 0)
|
||||
fclose(fd);
|
||||
} else if (binno == maxb + 1) { /* skip a bin */
|
||||
fseek(fd, nleft, SEEK_CUR);
|
||||
fread(&nleft, sizeof(nleft), 1, fd);
|
||||
flag = cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((u_char *) pos > end - sizeof(TRECHEADER))
|
||||
return (BUFFEND);
|
||||
hp = (u_char *) pos;
|
||||
for (i = sizeof(TRECHEADER); i ; --i)
|
||||
*hp++ = (u_char) getc(fd);
|
||||
if (end - pos->data < pos->length) {
|
||||
for (i = sizeof(TRECHEADER); i ; i--)
|
||||
ungetc(*--hp, fd);
|
||||
return (BUFFEND);
|
||||
}
|
||||
fread(pos->data, pos->length, 1, fd);
|
||||
nleft -= pos->length + sizeof(TRECHEADER);
|
||||
if (nleft == 0 && binno == fstack[infl0.top].maxb)
|
||||
fclose(fd);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* this is called when there is no special key. It's only called
|
||||
* in the first fsort pass.
|
||||
*/
|
||||
int
|
||||
makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
|
||||
int flno, nfiles;
|
||||
union f_handle filelist;
|
||||
struct recheader *buffer;
|
||||
u_char *bufend;
|
||||
struct field *dummy2;
|
||||
{
|
||||
static char *opos;
|
||||
register char *end, *pos;
|
||||
static int fileno = 0, overflow = 0;
|
||||
static FILE *fd = 0;
|
||||
register int c;
|
||||
|
||||
pos = (char *) buffer->data;
|
||||
end = min((char *) bufend, pos + MAXLLEN);
|
||||
if (overflow) {
|
||||
memmove(pos, opos, bufend - (u_char *) opos);
|
||||
pos += ((char *) bufend - opos);
|
||||
overflow = 0;
|
||||
}
|
||||
for (;;) {
|
||||
if (flno >= 0) {
|
||||
if (!(fd = fstack[flno].fd))
|
||||
return (EOF);
|
||||
} else if (!fd) {
|
||||
if (fileno >= nfiles) return(EOF);
|
||||
if (!(fd = fopen(filelist.names[fileno], "r")))
|
||||
err(2, "%s", filelist.names[fileno]);
|
||||
++fileno;
|
||||
}
|
||||
while ((pos < end) && ((c = getc(fd)) != EOF)) {
|
||||
if ((*pos++ = c) == REC_D) {
|
||||
buffer->offset = 0;
|
||||
buffer->length = pos - (char *) buffer->data;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
if (pos >= end && end == (char *) bufend) {
|
||||
if ((char *) buffer->data < end) {
|
||||
overflow = 1;
|
||||
opos = (char *) buffer->data;
|
||||
}
|
||||
return (BUFFEND);
|
||||
} else if (c == EOF) {
|
||||
if (buffer->data != (u_char *) pos) {
|
||||
warnx("last character not record delimiter");
|
||||
*pos++ = REC_D;
|
||||
buffer->offset = 0;
|
||||
buffer->length = pos - (char *) buffer->data;
|
||||
return(0);
|
||||
}
|
||||
FCLOSE(fd);
|
||||
fd = 0;
|
||||
if(flno >= 0) fstack[flno].fd = 0;
|
||||
} else {
|
||||
buffer->data[100] = '\000';
|
||||
warnx("line too long:ignoring %s...", buffer->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This generates keys. It's only called in the first fsort pass
|
||||
*/
|
||||
int
|
||||
makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
|
||||
int flno, nfiles;
|
||||
union f_handle filelist;
|
||||
struct recheader *buffer;
|
||||
u_char *bufend;
|
||||
struct field *ftbl;
|
||||
{
|
||||
static int (*get)();
|
||||
static int fileno = 0;
|
||||
static FILE *dbdesc = 0;
|
||||
static DBT dbkey[1], line[1];
|
||||
static int overflow = 0;
|
||||
int c;
|
||||
if (overflow) {
|
||||
overflow = 0;
|
||||
enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
|
||||
return (0);
|
||||
}
|
||||
for (;;) {
|
||||
if (flno >= 0) {
|
||||
get = seq;
|
||||
if (!(dbdesc = fstack[flno].fd))
|
||||
return(EOF);
|
||||
} else if (!dbdesc) {
|
||||
if (fileno >= nfiles)
|
||||
return (EOF);
|
||||
dbdesc = fopen(filelist.names[fileno], "r");
|
||||
if (!dbdesc)
|
||||
err(2, "%s", filelist.names[fileno]);
|
||||
++fileno;
|
||||
get = seq;
|
||||
}
|
||||
if (!(c = get(dbdesc, line, dbkey))) {
|
||||
if ((signed)line->size > bufend - buffer->data)
|
||||
overflow = 1;
|
||||
else
|
||||
overflow = enterkey(buffer, line,
|
||||
bufend - (u_char *) buffer, ftbl);
|
||||
if (overflow)
|
||||
return (BUFFEND);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
if (c == EOF) {
|
||||
FCLOSE(dbdesc);
|
||||
dbdesc = 0;
|
||||
if (flno >= 0) fstack[flno].fd = 0;
|
||||
} else {
|
||||
|
||||
((char *) line->data)[60] = '\000';
|
||||
warnx("line too long: ignoring %.100s...",
|
||||
(char *)line->data);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* get a key/line pair from fd
|
||||
*/
|
||||
int
|
||||
seq(fd, line, key)
|
||||
FILE *fd;
|
||||
DBT *key, *line;
|
||||
{
|
||||
static char *buf, flag = 1;
|
||||
register char *end, *pos;
|
||||
register int c;
|
||||
if (flag) {
|
||||
flag = 0;
|
||||
buf = (char *) linebuf;
|
||||
end = buf + MAXLLEN;
|
||||
line->data = buf;
|
||||
}
|
||||
pos = buf;
|
||||
while ((c = getc(fd)) != EOF) {
|
||||
if ((*pos++ = c) == REC_D) {
|
||||
line->size = pos - buf;
|
||||
return (0);
|
||||
}
|
||||
if (pos == end) {
|
||||
line->size = MAXLLEN;
|
||||
*--pos = REC_D;
|
||||
while ((c = getc(fd)) != EOF) {
|
||||
if (c == REC_D)
|
||||
return (BUFFEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pos != buf) {
|
||||
warnx("last character not record delimiter");
|
||||
*pos++ = REC_D;
|
||||
line->size = pos - buf;
|
||||
return (0);
|
||||
} else
|
||||
return (EOF);
|
||||
}
|
||||
|
||||
/*
|
||||
* write a key/line pair to a temporary file
|
||||
*/
|
||||
void
|
||||
putrec(rec, fd)
|
||||
register struct recheader *rec;
|
||||
register FILE *fd;
|
||||
{
|
||||
EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* write a line to output
|
||||
*/
|
||||
void
|
||||
putline(rec, fd)
|
||||
register struct recheader *rec;
|
||||
register FILE *fd;
|
||||
{
|
||||
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* get a record from a temporary file. (Used by merge sort.)
|
||||
*/
|
||||
int
|
||||
geteasy(flno, filelist, nfiles, rec, end, dummy2)
|
||||
int flno, nfiles;
|
||||
union f_handle filelist;
|
||||
register struct recheader *rec;
|
||||
register u_char *end;
|
||||
struct field *dummy2;
|
||||
{
|
||||
int i;
|
||||
FILE *fd;
|
||||
fd = fstack[flno].fd;
|
||||
if ((u_char *) rec > end - sizeof(TRECHEADER))
|
||||
return (BUFFEND);
|
||||
if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
|
||||
fclose(fd);
|
||||
fstack[flno].fd = 0;
|
||||
return (EOF);
|
||||
}
|
||||
if (end - rec->data < rec->length) {
|
||||
for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
|
||||
ungetc(*((char *) rec + i), fd);
|
||||
return (BUFFEND);
|
||||
}
|
||||
fread(rec->data, rec->length, 1, fd);
|
||||
return (0);
|
||||
}
|
286
usr.bin/sort/fsort.c
Normal file
286
usr.bin/sort/fsort.c
Normal file
@ -0,0 +1,286 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
/*
|
||||
* Read in the next bin. If it fits in one segment sort it;
|
||||
* otherwise refine it by segment deeper by one character,
|
||||
* and try again on smaller bins. Sort the final bin at this level
|
||||
* of recursion to keep the head of fstack at 0.
|
||||
* After PANIC passes, abort to merge sort.
|
||||
*/
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
u_char **keylist = 0, *buffer = 0, *linebuf = 0;
|
||||
struct tempfile fstack[MAXFCT];
|
||||
extern char *toutpath;
|
||||
#define FSORTMAX 4
|
||||
int PANIC = FSORTMAX;
|
||||
|
||||
void
|
||||
fsort(binno, depth, infiles, nfiles, outfd, ftbl)
|
||||
register int binno, depth, nfiles;
|
||||
register union f_handle infiles;
|
||||
FILE *outfd;
|
||||
register struct field *ftbl;
|
||||
{
|
||||
register u_char *bufend, **keypos, *tmpbuf;
|
||||
u_char *weights;
|
||||
int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
|
||||
register int c, nelem;
|
||||
long sizes [NBINS+1];
|
||||
union f_handle tfiles, mstart = {MAXFCT-16};
|
||||
register int (*get)(int, union f_handle, int, RECHEADER *,
|
||||
u_char *, struct field *);
|
||||
register struct recheader *crec;
|
||||
struct field tfield[2];
|
||||
FILE *prevfd, *tailfd[FSORTMAX+1];
|
||||
|
||||
memset(tailfd, 0, sizeof(tailfd));
|
||||
prevfd = outfd;
|
||||
memset(tfield, 0, sizeof(tfield));
|
||||
if (ftbl[0].flags & R)
|
||||
tfield[0].weights = Rascii;
|
||||
else
|
||||
tfield[0].weights = ascii;
|
||||
tfield[0].icol.num = 1;
|
||||
weights = ftbl[0].weights;
|
||||
if (!buffer) {
|
||||
buffer = malloc(BUFSIZE);
|
||||
keylist = malloc(MAXNUM * sizeof(u_char *));
|
||||
if (!SINGL_FLD)
|
||||
linebuf = malloc(MAXLLEN);
|
||||
}
|
||||
bufend = buffer + BUFSIZE;
|
||||
if (binno >= 0) {
|
||||
tfiles.top = infiles.top + nfiles;
|
||||
get = getnext;
|
||||
} else {
|
||||
tfiles.top = 0;
|
||||
if (SINGL_FLD)
|
||||
get = makeline;
|
||||
else
|
||||
get = makekey;
|
||||
}
|
||||
for (;;) {
|
||||
memset(sizes, 0, sizeof(sizes));
|
||||
c = ntfiles = 0;
|
||||
if (binno == weights[REC_D] &&
|
||||
!(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
|
||||
rd_append(weights[REC_D],
|
||||
infiles, nfiles, prevfd, buffer, bufend);
|
||||
break;
|
||||
} else if (binno == weights[REC_D]) {
|
||||
depth = 0; /* start over on flat weights */
|
||||
ftbl = tfield;
|
||||
weights = ftbl[0].weights;
|
||||
}
|
||||
while (c != EOF) {
|
||||
keypos = keylist;
|
||||
nelem = 0;
|
||||
crec = (RECHEADER *) buffer;
|
||||
while((c = get(binno, infiles, nfiles, crec, bufend,
|
||||
ftbl)) == 0) {
|
||||
*keypos++ = crec->data + depth;
|
||||
if (++nelem == MAXNUM) {
|
||||
c = BUFFEND;
|
||||
break;
|
||||
}
|
||||
crec =(RECHEADER *) ((char *) crec +
|
||||
SALIGN(crec->length) + sizeof(TRECHEADER));
|
||||
}
|
||||
if (c == BUFFEND || ntfiles || mfct) { /* push */
|
||||
if (panic >= PANIC) {
|
||||
fstack[MAXFCT-16+mfct].fd = ftmp();
|
||||
if (radixsort(keylist, nelem, weights,
|
||||
REC_D))
|
||||
err(2, NULL);
|
||||
append(keylist, nelem, depth, fstack[
|
||||
MAXFCT-16+mfct].fd, putrec, ftbl);
|
||||
mfct++;
|
||||
/* reduce number of open files */
|
||||
if (mfct == 16 ||(c == EOF && ntfiles)) {
|
||||
tmpbuf = malloc(bufend -
|
||||
crec->data);
|
||||
memmove(tmpbuf, crec->data,
|
||||
bufend - crec->data);
|
||||
fstack[tfiles.top + ntfiles].fd
|
||||
= ftmp();
|
||||
fmerge(0, mstart, mfct, geteasy,
|
||||
fstack[tfiles.top+ntfiles].fd,
|
||||
putrec, ftbl);
|
||||
++ntfiles;
|
||||
mfct = 0;
|
||||
memmove(crec->data, tmpbuf,
|
||||
bufend - crec->data);
|
||||
free(tmpbuf);
|
||||
}
|
||||
} else {
|
||||
fstack[tfiles.top + ntfiles].fd= ftmp();
|
||||
onepass(keylist, depth, nelem, sizes,
|
||||
weights, fstack[tfiles.top+ntfiles].fd);
|
||||
++ntfiles;
|
||||
}
|
||||
}
|
||||
}
|
||||
get = getnext;
|
||||
if (!ntfiles && !mfct) { /* everything in memory--pop */
|
||||
if (nelem > 1)
|
||||
if (radixsort(keylist, nelem, weights, REC_D))
|
||||
err(2, NULL);
|
||||
append(keylist, nelem, depth, outfd, putline, ftbl);
|
||||
break; /* pop */
|
||||
}
|
||||
if (panic >= PANIC) {
|
||||
if (!ntfiles)
|
||||
fmerge(0, mstart, mfct, geteasy,
|
||||
outfd, putline, ftbl);
|
||||
else
|
||||
fmerge(0, tfiles, ntfiles, geteasy,
|
||||
outfd, putline, ftbl);
|
||||
break;
|
||||
|
||||
}
|
||||
total = maxb = lastb = 0; /* find if one bin dominates */
|
||||
for (i = 0; i < NBINS; i++)
|
||||
if (sizes[i]) {
|
||||
if (sizes[i] > sizes[maxb])
|
||||
maxb = i;
|
||||
lastb = i;
|
||||
total += sizes[i];
|
||||
}
|
||||
if (sizes[maxb] < max((total / 2) , BUFSIZE))
|
||||
maxb = lastb; /* otherwise pop after last bin */
|
||||
fstack[tfiles.top].lastb = lastb;
|
||||
fstack[tfiles.top].maxb = maxb;
|
||||
|
||||
/* start refining next level. */
|
||||
get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
|
||||
for (i = 0; i < maxb; i++) {
|
||||
if (!sizes[i]) /* bin empty; step ahead file offset */
|
||||
get(i, tfiles, ntfiles, crec, bufend, 0);
|
||||
else
|
||||
fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
|
||||
}
|
||||
if (lastb != maxb) {
|
||||
if (prevfd != outfd)
|
||||
tailfd[panic] = prevfd;
|
||||
prevfd = ftmp();
|
||||
for (i = maxb+1; i <= lastb; i++)
|
||||
if (!sizes[i])
|
||||
get(i, tfiles, ntfiles, crec, bufend,0);
|
||||
else
|
||||
fsort(i, depth+1, tfiles, ntfiles,
|
||||
prevfd, ftbl);
|
||||
}
|
||||
|
||||
/* sort biggest (or last) bin at this level */
|
||||
depth++;
|
||||
panic++;
|
||||
binno = maxb;
|
||||
infiles.top = tfiles.top; /* getnext will free tfiles, */
|
||||
nfiles = ntfiles; /* so overwrite them */
|
||||
}
|
||||
if (prevfd != outfd) {
|
||||
concat(outfd, prevfd);
|
||||
fclose(prevfd);
|
||||
}
|
||||
for (i = panic; i >= 0; --i)
|
||||
if (tailfd[i]) {
|
||||
concat(outfd, tailfd[i]);
|
||||
fclose(tailfd[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This is one pass of radix exchange, dumping the bins to disk.
|
||||
*/
|
||||
#define swap(a, b, t) t = a, a = b, b = t
|
||||
void
|
||||
onepass(a, depth, n, sizes, tr, fd)
|
||||
u_char **a;
|
||||
int depth;
|
||||
long n, sizes[];
|
||||
u_char *tr;
|
||||
FILE *fd;
|
||||
{
|
||||
long tsizes[NBINS+1];
|
||||
u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
|
||||
static histo[256];
|
||||
int *hp;
|
||||
register int c;
|
||||
u_char **an, *t, **aj;
|
||||
register u_char **ak, *r;
|
||||
|
||||
memset(tsizes, 0, sizeof(tsizes));
|
||||
depth += sizeof(TRECHEADER);
|
||||
an = a + n;
|
||||
for (ak = a; ak < an; ak++) {
|
||||
histo[c = tr[**ak]]++;
|
||||
tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
|
||||
}
|
||||
|
||||
bin[0] = a;
|
||||
bpmax = bin + 256;
|
||||
tp = top, hp = histo;
|
||||
for (bp = bin; bp < bpmax; bp++) {
|
||||
*tp++ = *(bp+1) = *bp + (c = *hp);
|
||||
*hp++ = 0;
|
||||
if (c <= 1)
|
||||
continue;
|
||||
}
|
||||
for(aj = a; aj < an; *aj = r, aj = bin[c+1])
|
||||
for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
|
||||
swap(*ak, r, t);
|
||||
|
||||
for (ak = a, c = 0; c < 256; c++) {
|
||||
an = bin[c+1];
|
||||
n = an - ak;
|
||||
tsizes[c] += n * sizeof(TRECHEADER);
|
||||
/* tell getnext how many elements in this bin, this segment. */
|
||||
EWRITE(tsizes+c, sizeof(long), 1, fd);
|
||||
sizes[c] += tsizes[c];
|
||||
for (; ak < an; ++ak)
|
||||
putrec((RECHEADER *) *ak, fd);
|
||||
}
|
||||
}
|
60
usr.bin/sort/fsort.h
Normal file
60
usr.bin/sort/fsort.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#define POW 20 /* exponent for buffer size */
|
||||
#define BUFSIZE (1 << POW)
|
||||
#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */
|
||||
#define BUFFEND (EOF-2)
|
||||
#define MAXFCT 1000
|
||||
#define MAXLLEN ((1 << min(POW-4, 16)) - 14)
|
||||
|
||||
extern u_char **keylist, **l2buf, *buffer, *linebuf;
|
||||
|
||||
/* temp files in the stack have a file descriptor, a largest bin (maxb)
|
||||
* which becomes the last non-empty bin (lastb) when the actual largest
|
||||
* bin is smaller than max(half the total file, BUFSIZE)
|
||||
* Max_o is the offset of maxb so it can be sought after the other bins
|
||||
* are sorted.
|
||||
*/
|
||||
struct tempfile {
|
||||
FILE *fd;
|
||||
u_char maxb;
|
||||
u_char lastb;
|
||||
long max_o;
|
||||
};
|
||||
extern struct tempfile fstack[MAXFCT];
|
326
usr.bin/sort/init.c
Normal file
326
usr.bin/sort/init.c
Normal file
@ -0,0 +1,326 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
#include "sort.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
extern struct coldesc clist[(ND+1)*2];
|
||||
extern int ncols;
|
||||
u_char gweights[NBINS];
|
||||
|
||||
/*
|
||||
* clist (list of columns which correspond to one or more icol or tcol)
|
||||
* is in increasing order of columns.
|
||||
* Fields are kept in increasing order of fields.
|
||||
*/
|
||||
|
||||
/*
|
||||
* keep clist in order--inserts a column in a sorted array
|
||||
*/
|
||||
static void
|
||||
insertcol(field)
|
||||
struct field *field;
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ncols; i++)
|
||||
if (field->icol.num <= clist[i].num)
|
||||
break;
|
||||
if (field->icol.num != clist[i].num) {
|
||||
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
|
||||
clist[i].num = field->icol.num;
|
||||
ncols++;
|
||||
}
|
||||
if (field->tcol.num && field->tcol.num != field->icol.num) {
|
||||
for (i = 0; i < ncols; i++)
|
||||
if (field->tcol.num <= clist[i].num)
|
||||
break;
|
||||
if (field->tcol.num != clist[i].num) {
|
||||
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
|
||||
clist[i].num = field->tcol.num;
|
||||
ncols++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* matches fields with the appropriate columns--n^2 but who cares?
|
||||
*/
|
||||
void
|
||||
fldreset(fldtab)
|
||||
struct field *fldtab;
|
||||
{
|
||||
int i;
|
||||
fldtab[0].tcol.p = clist+ncols-1;
|
||||
for (++fldtab; fldtab->icol.num; ++fldtab) {
|
||||
for (i = 0; fldtab->icol.num != clist[i].num; i++);
|
||||
fldtab->icol.p = clist + i;
|
||||
if (!fldtab->tcol.num)
|
||||
continue;
|
||||
for (i = 0; fldtab->tcol.num != clist[i].num; i++);
|
||||
fldtab->tcol.p = clist + i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* interprets a column in a -k field
|
||||
*/
|
||||
char *
|
||||
setcolumn(pos, cur_fld, gflag)
|
||||
char *pos;
|
||||
struct field *cur_fld;
|
||||
int gflag;
|
||||
{
|
||||
struct column *col;
|
||||
int tmp;
|
||||
col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
|
||||
pos += sscanf(pos, "%d", &(col->num));
|
||||
while (isdigit(*pos))
|
||||
pos++;
|
||||
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
|
||||
errx(2, "field numbers must be positive");
|
||||
if (*pos == '.') {
|
||||
if (!col->num)
|
||||
errx(2, "cannot indent end of line");
|
||||
pos += sscanf(++pos, "%d", &(col->indent));
|
||||
while (isdigit(*pos))
|
||||
pos++;
|
||||
if (&cur_fld->icol == col)
|
||||
col->indent--;
|
||||
if (col->indent < 0)
|
||||
errx(2, "illegal offset");
|
||||
}
|
||||
if (optval(*pos, cur_fld->tcol.num))
|
||||
while (tmp = optval(*pos, cur_fld->tcol.num)) {
|
||||
cur_fld->flags |= tmp;
|
||||
pos++;
|
||||
}
|
||||
if (cur_fld->icol.num == 0)
|
||||
cur_fld->icol.num = 1;
|
||||
return (pos);
|
||||
}
|
||||
|
||||
int
|
||||
setfield(pos, cur_fld, gflag)
|
||||
char *pos;
|
||||
struct field *cur_fld;
|
||||
int gflag;
|
||||
{
|
||||
static int nfields = 0;
|
||||
int tmp;
|
||||
char *setcolumn();
|
||||
if (++nfields == ND)
|
||||
errx(2, "too many sort keys. (Limit is %d)", ND-1);
|
||||
cur_fld->weights = ascii;
|
||||
cur_fld->mask = alltable;
|
||||
pos = setcolumn(pos, cur_fld, gflag);
|
||||
if (*pos == '\0') /* key extends to EOL. */
|
||||
cur_fld->tcol.num = 0;
|
||||
else {
|
||||
if (*pos != ',')
|
||||
errx(2, "illegal field descriptor");
|
||||
setcolumn((++pos), cur_fld, gflag);
|
||||
}
|
||||
if (!cur_fld->flags)
|
||||
cur_fld->flags = gflag;
|
||||
tmp = cur_fld->flags;
|
||||
|
||||
/*
|
||||
* Assign appropriate mask table and weight table.
|
||||
* If the global weights are reversed, the local field
|
||||
* must be "re-reversed".
|
||||
*/
|
||||
if (((tmp & R) ^ (gflag & R)) && tmp & F)
|
||||
cur_fld->weights = RFtable;
|
||||
else if (tmp & F)
|
||||
cur_fld->weights = Ftable;
|
||||
else if ((tmp & R) ^ (gflag & R))
|
||||
cur_fld->weights = Rascii;
|
||||
if (tmp & I)
|
||||
cur_fld->mask = itable;
|
||||
else if (tmp & D)
|
||||
cur_fld->mask = dtable;
|
||||
cur_fld->flags |= (gflag & (BI | BT));
|
||||
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
|
||||
cur_fld->flags &= (D|F|I|N|R|BI);
|
||||
if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
|
||||
&& cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
|
||||
&& cur_fld->tcol.indent < cur_fld->icol.indent))
|
||||
errx(2, "fields out of order");
|
||||
insertcol(cur_fld);
|
||||
return (cur_fld->tcol.num);
|
||||
}
|
||||
|
||||
int
|
||||
optval(desc, tcolflag)
|
||||
int desc, tcolflag;
|
||||
{
|
||||
switch(desc) {
|
||||
case 'b':
|
||||
if (!tcolflag)
|
||||
return(BI);
|
||||
else
|
||||
return(BT);
|
||||
case 'd': return(D);
|
||||
case 'f': return(F);
|
||||
case 'i': return(I);
|
||||
case 'n': return(N);
|
||||
case 'r': return(R);
|
||||
default: return(0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fixit(argc, argv)
|
||||
int *argc;
|
||||
char **argv;
|
||||
{
|
||||
int i, j, v, w, x;
|
||||
static char vbuf[ND*20], *vpos, *tpos;
|
||||
vpos = vbuf;
|
||||
|
||||
for (i = 1; i < *argc; i++) {
|
||||
if (argv[i][0] == '+') {
|
||||
tpos = argv[i]+1;
|
||||
argv[i] = vpos;
|
||||
vpos += sprintf(vpos, "-k");
|
||||
tpos += sscanf(tpos, "%d", &v);
|
||||
while (isdigit(*tpos))
|
||||
tpos++;
|
||||
vpos += sprintf(vpos, "%d", v+1);
|
||||
if (*tpos == '.') {
|
||||
tpos += sscanf(++tpos, "%d", &x);
|
||||
vpos += sprintf(vpos, ".%d", x+1);
|
||||
}
|
||||
while (*tpos)
|
||||
*vpos++ = *tpos++;
|
||||
vpos += sprintf(vpos, ",");
|
||||
if (argv[i+1] &&
|
||||
argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
|
||||
tpos = argv[i+1] + 1;
|
||||
tpos += sscanf(tpos, "%d", &w);
|
||||
while (isdigit(*tpos))
|
||||
tpos++;
|
||||
x = 0;
|
||||
if (*tpos == '.') {
|
||||
tpos += sscanf(++tpos, "%d", &x);
|
||||
while (isdigit(*tpos))
|
||||
*tpos++;
|
||||
}
|
||||
if (x) {
|
||||
vpos += sprintf(vpos, "%d", w+1);
|
||||
vpos += sprintf(vpos, ".%d", x);
|
||||
} else
|
||||
vpos += sprintf(vpos, "%d", w);
|
||||
while (*tpos)
|
||||
*vpos++ = *tpos++;
|
||||
for (j= i+1; j < *argc; j++)
|
||||
argv[j] = argv[j+1];
|
||||
*argc -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ascii, Rascii, Ftable, and RFtable map
|
||||
* REC_D -> REC_D; {not REC_D} -> {not REC_D}.
|
||||
* gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
|
||||
* Note: when sorting in forward order, to encode character zero in a key,
|
||||
* use \001\001; character 1 becomes \001\002. In this case, character 0
|
||||
* is reserved for the field delimiter. Analagously for -r (fld_d = 255).
|
||||
* Note: this is only good for ASCII sorting. For different LC 's,
|
||||
* all bets are off. See also num_init in number.c
|
||||
*/
|
||||
void
|
||||
settables(gflags)
|
||||
int gflags;
|
||||
{
|
||||
u_char *wts;
|
||||
int i, incr;
|
||||
for (i=0; i < 256; i++) {
|
||||
ascii[i] = i;
|
||||
if (i > REC_D && i < 255 - REC_D+1)
|
||||
Rascii[i] = 255 - i + 1;
|
||||
else
|
||||
Rascii[i] = 255 - i;
|
||||
if (islower(i)) {
|
||||
Ftable[i] = Ftable[i- ('a' -'A')];
|
||||
RFtable[i] = RFtable[i - ('a' - 'A')];
|
||||
} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
|
||||
Ftable[i] = i + 1;
|
||||
RFtable[i] = Rascii[i] - 1;
|
||||
} else {
|
||||
Ftable[i] = i;
|
||||
RFtable[i] = Rascii[i];
|
||||
}
|
||||
alltable[i] = 1;
|
||||
if (i == '\n' || isprint(i))
|
||||
itable[i] = 1;
|
||||
else itable[i] = 0;
|
||||
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
|
||||
dtable[i] = 1;
|
||||
else dtable[i] = 0;
|
||||
}
|
||||
Rascii[REC_D] = RFtable[REC_D] = REC_D;
|
||||
if (REC_D >= 'A' && REC_D < 'Z')
|
||||
++Ftable[REC_D + ('a' - 'A')];
|
||||
if (gflags & R && (!(gflags & F) || !SINGL_FLD))
|
||||
wts = Rascii;
|
||||
else if (!(gflags & F) || !SINGL_FLD)
|
||||
wts = ascii;
|
||||
else if (gflags & R)
|
||||
wts = RFtable;
|
||||
else
|
||||
wts = Ftable;
|
||||
memmove(gweights, wts, sizeof(gweights));
|
||||
incr = (gflags & R) ? -1 : 1;
|
||||
for (i = 0; i < REC_D; i++)
|
||||
gweights[i] += incr;
|
||||
gweights[REC_D] = ((gflags & R) ? 255 : 0);
|
||||
if (SINGL_FLD && gflags & F) {
|
||||
for (i = 0; i < REC_D; i++) {
|
||||
ascii[i] += incr;
|
||||
Rascii[i] += incr;
|
||||
}
|
||||
ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
|
||||
}
|
||||
}
|
304
usr.bin/sort/msort.c
Normal file
304
usr.bin/sort/msort.c
Normal file
@ -0,0 +1,304 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* Subroutines using comparisons: merge sort and check order */
|
||||
#define DELETE (1)
|
||||
#define LALIGN(n) ((n+3) & ~3)
|
||||
|
||||
typedef struct mfile {
|
||||
u_char *end;
|
||||
short flno;
|
||||
struct recheader rec[1];
|
||||
} MFILE;
|
||||
typedef struct tmfile {
|
||||
u_char *end;
|
||||
short flno;
|
||||
struct trecheader rec[1];
|
||||
} TMFILE;
|
||||
u_char *wts, *wts1 = 0;
|
||||
struct mfile *cfilebuf;
|
||||
|
||||
static int cmp __P((struct recheader *, struct recheader *));
|
||||
static int insert __P((struct mfile **, struct mfile **, int, int));
|
||||
|
||||
void
|
||||
fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
|
||||
union f_handle files;
|
||||
int binno, nfiles;
|
||||
int (*get)();
|
||||
FILE *outfd;
|
||||
void (*fput)();
|
||||
struct field *ftbl;
|
||||
{
|
||||
FILE *tout;
|
||||
int i, j, last;
|
||||
void (*put)(struct recheader *, FILE *);
|
||||
extern int geteasy();
|
||||
struct tempfile *l_fstack;
|
||||
|
||||
wts = ftbl->weights;
|
||||
if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
|
||||
wts1 = (ftbl->flags & R) ? Rascii : ascii;
|
||||
if (!cfilebuf)
|
||||
cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
|
||||
|
||||
i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
|
||||
if (!buffer || i > BUFSIZE) {
|
||||
buffer = buffer ? realloc(buffer, i) : malloc(i);
|
||||
if (!buffer)
|
||||
err(2, NULL);
|
||||
if (!SINGL_FLD)
|
||||
linebuf = malloc(MAXLLEN);
|
||||
}
|
||||
|
||||
if (binno >= 0)
|
||||
l_fstack = fstack + files.top;
|
||||
else
|
||||
l_fstack = fstack;
|
||||
while (nfiles) {
|
||||
put = putrec;
|
||||
for (j = 0; j < nfiles; j += 16) {
|
||||
if (nfiles <= 16) {
|
||||
tout = outfd;
|
||||
put = fput;
|
||||
}
|
||||
else
|
||||
tout = ftmp();
|
||||
last = min(16, nfiles - j);
|
||||
if (binno < 0) {
|
||||
for (i = 0; i < last; i++)
|
||||
if (!(l_fstack[i+MAXFCT-1-16].fd =
|
||||
fopen(files.names[j + i], "r")))
|
||||
err(2, "%s", files.names[j+i]);
|
||||
merge(MAXFCT-1-16, last, get, tout, put, ftbl);
|
||||
}
|
||||
else {
|
||||
for (i = 0; i< last; i++)
|
||||
rewind(l_fstack[i+j].fd);
|
||||
merge(files.top+j, last, get, tout, put, ftbl);
|
||||
}
|
||||
if (nfiles > 16) l_fstack[j/16].fd = tout;
|
||||
}
|
||||
nfiles = (nfiles + 15) / 16;
|
||||
if (nfiles == 1)
|
||||
nfiles = 0;
|
||||
if (binno < 0) {
|
||||
binno = 0;
|
||||
get = geteasy;
|
||||
files.top = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
merge(infl0, nfiles, get, outfd, put, ftbl)
|
||||
int infl0, nfiles;
|
||||
int (*get)();
|
||||
void (*put)(struct recheader *, FILE *);
|
||||
FILE *outfd;
|
||||
struct field *ftbl;
|
||||
{
|
||||
int c, i, j;
|
||||
union f_handle dummy = {0};
|
||||
struct mfile *flist[16], *cfile;
|
||||
for (i = j = 0; i < nfiles; i++) {
|
||||
cfile = (MFILE *) (buffer +
|
||||
i * LALIGN(MAXLLEN + sizeof(TMFILE)));
|
||||
cfile->flno = j + infl0;
|
||||
cfile->end = cfile->rec->data + MAXLLEN;
|
||||
for (c = 1; c == 1;) {
|
||||
if (EOF == (c = get(j+infl0, dummy, nfiles,
|
||||
cfile->rec, cfile->end, ftbl))) {
|
||||
--i;
|
||||
--nfiles;
|
||||
break;
|
||||
}
|
||||
if (i)
|
||||
c = insert(flist, &cfile, i, !DELETE);
|
||||
else
|
||||
flist[0] = cfile;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
cfile = cfilebuf;
|
||||
cfile->flno = flist[0]->flno;
|
||||
cfile->end = cfile->rec->data + MAXLLEN;
|
||||
while (nfiles) {
|
||||
for (c = 1; c == 1;) {
|
||||
if (EOF == (c = get(cfile->flno, dummy, nfiles,
|
||||
cfile->rec, cfile->end, ftbl))) {
|
||||
put(flist[0]->rec, outfd);
|
||||
memmove(flist, flist + 1,
|
||||
sizeof(MFILE *) * (--nfiles));
|
||||
cfile->flno = flist[0]->flno;
|
||||
break;
|
||||
}
|
||||
if (!(c = insert(flist, &cfile, nfiles, DELETE)))
|
||||
put(cfile->rec, outfd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
|
||||
* otherwise just inserts *rec in flist.
|
||||
*/
|
||||
static int
|
||||
insert(flist, rec, ttop, delete)
|
||||
struct mfile **flist, **rec;
|
||||
int delete, ttop; /* delete = 0 or 1 */
|
||||
{
|
||||
register struct mfile *tmprec;
|
||||
register int top, mid, bot = 0, cmpv = 1;
|
||||
tmprec = *rec;
|
||||
top = ttop;
|
||||
for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
|
||||
cmpv = cmp(tmprec->rec, flist[mid]->rec);
|
||||
if (cmpv < 0)
|
||||
top = mid;
|
||||
else if (cmpv > 0)
|
||||
bot = mid;
|
||||
else {
|
||||
if (!UNIQUE)
|
||||
bot = mid - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (delete) {
|
||||
if (UNIQUE) {
|
||||
if (!bot && cmpv)
|
||||
cmpv = cmp(tmprec->rec, flist[0]->rec);
|
||||
if (!cmpv)
|
||||
return(1);
|
||||
}
|
||||
tmprec = flist[0];
|
||||
if (bot)
|
||||
memmove(flist, flist+1, bot * sizeof(MFILE **));
|
||||
flist[bot] = *rec;
|
||||
*rec = tmprec;
|
||||
(*rec)->flno = (*flist)->flno;
|
||||
return (0);
|
||||
}
|
||||
else {
|
||||
if (!bot && !(UNIQUE && !cmpv)) {
|
||||
cmpv = cmp(tmprec->rec, flist[0]->rec);
|
||||
if (cmpv < 0)
|
||||
bot = -1;
|
||||
}
|
||||
if (UNIQUE && !cmpv)
|
||||
return (1);
|
||||
bot++;
|
||||
memmove(flist + bot+1, flist + bot,
|
||||
(ttop - bot) * sizeof(MFILE **));
|
||||
flist[bot] = *rec;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check order on one file
|
||||
*/
|
||||
void
|
||||
order(infile, get, ftbl)
|
||||
union f_handle infile;
|
||||
int (*get)();
|
||||
struct field *ftbl;
|
||||
{
|
||||
u_char *end;
|
||||
int c;
|
||||
struct recheader *crec, *prec, *trec;
|
||||
|
||||
if (!SINGL_FLD)
|
||||
linebuf = malloc(MAXLLEN);
|
||||
buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
|
||||
end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
|
||||
crec = (RECHEADER *) buffer;
|
||||
prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
|
||||
wts = ftbl->weights;
|
||||
if (SINGL_FLD && ftbl->flags & F)
|
||||
wts1 = ftbl->flags & R ? Rascii : ascii;
|
||||
else
|
||||
wts1 = 0;
|
||||
if (0 == get(-1, infile, 1, prec, end, ftbl))
|
||||
while (0 == get(-1, infile, 1, crec, end, ftbl)) {
|
||||
if (0 < (c = cmp(prec, crec))) {
|
||||
crec->data[crec->length-1] = 0;
|
||||
errx(1, "found disorder: %s", crec->data+crec->offset);
|
||||
}
|
||||
if (UNIQUE && !c) {
|
||||
crec->data[crec->length-1] = 0;
|
||||
errx(1, "found non-uniqueness: %s",
|
||||
crec->data+crec->offset);
|
||||
}
|
||||
trec = prec;
|
||||
prec = crec;
|
||||
crec = trec;
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static int
|
||||
cmp(rec1, rec2)
|
||||
struct recheader *rec1, *rec2;
|
||||
{
|
||||
register r;
|
||||
register u_char *pos1, *pos2, *end;
|
||||
register u_char *cwts;
|
||||
for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
|
||||
pos1 = rec1->data;
|
||||
pos2 = rec2->data;
|
||||
if (!SINGL_FLD && UNIQUE)
|
||||
end = pos1 + min(rec1->offset, rec2->offset);
|
||||
else
|
||||
end = pos1 + min(rec1->length, rec2->length);
|
||||
for (; pos1 < end; ) {
|
||||
if (r = cwts[*pos1++] - cwts[*pos2++])
|
||||
return (r);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
40
usr.bin/sort/pathnames.h
Normal file
40
usr.bin/sort/pathnames.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#define _PATH_STDIN "/dev/stdin"
|
||||
#define _PATH_SORTTMP "/var/tmp/sort.XXXXXXXX"
|
364
usr.bin/sort/sort.1
Normal file
364
usr.bin/sort/sort.1
Normal file
@ -0,0 +1,364 @@
|
||||
.\" Copyright (c) 1991, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to Berkeley by
|
||||
.\" the Institute of Electrical and Electronics Engineers, Inc.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 3. All advertising materials mentioning features or use of this software
|
||||
.\" must display the following acknowledgement:
|
||||
.\" This product includes software developed by the University of
|
||||
.\" California, Berkeley and its contributors.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
|
||||
.\"
|
||||
.Dd June 6, 1993
|
||||
.Dt SORT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm sort
|
||||
.Nd sort or merge text files
|
||||
.Sh SYNOPSIS
|
||||
.Nm sort
|
||||
.Op Fl cmubdfinr
|
||||
.Op Fl t Ar char
|
||||
.Op Fl T Ar char
|
||||
.Oo
|
||||
.Cm Fl k Ar field1[,field2]
|
||||
.Oc
|
||||
.Ar ...
|
||||
.Op Fl o Ar output
|
||||
.Op Ar file
|
||||
.Ar ...
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm sort
|
||||
utility
|
||||
sorts text files by lines.
|
||||
Comparisons are based on one or more sort keys extracted
|
||||
from each line of input, and are performed
|
||||
lexicographically. By default, if keys are not given,
|
||||
.Nm sort
|
||||
regards each input line as a single field.
|
||||
.Pp
|
||||
The following options are available:
|
||||
.Bl -tag -width indent
|
||||
.It Fl c
|
||||
Check that the single input file is sorted.
|
||||
If the file is not sorted,
|
||||
.Nm sort
|
||||
produces the appropriate error messages and exits with code 1;
|
||||
otherwise,
|
||||
.Nm sort
|
||||
returns 0.
|
||||
.Nm Sort
|
||||
.Fl c
|
||||
produces no output.
|
||||
.It Fl m
|
||||
Merge only; the input files are assumed to be pre-sorted.
|
||||
.It Fl o Ar output
|
||||
The argument given is the name of an
|
||||
.Ar output
|
||||
file to
|
||||
be used instead of the standard output.
|
||||
This file
|
||||
can be the same as one of the input files.
|
||||
.It Fl u
|
||||
Unique: suppress all but one in each set of lines
|
||||
having equal keys.
|
||||
If used with the
|
||||
.Fl c
|
||||
option,
|
||||
check that there are no lines with duplicate keys.
|
||||
.El
|
||||
.Pp
|
||||
The following options override the default ordering rules.
|
||||
When ordering options appear independent of key field
|
||||
specifications, the requested field ordering rules are
|
||||
applied globally to all sort keys.
|
||||
When attached to a specific key (see
|
||||
.Fl k ) ,
|
||||
the ordering options override
|
||||
all global ordering options for that key.
|
||||
.Bl -tag -width indent
|
||||
.It Fl d
|
||||
Only blank space and alphanumeric characters
|
||||
.\" according
|
||||
.\" to the current setting of LC_CTYPE
|
||||
are used
|
||||
in making comparisons.
|
||||
.It Fl f
|
||||
Considers all lowercase characters that have uppercase
|
||||
equivalents to be the same for purposes of
|
||||
comparison.
|
||||
.It Fl i
|
||||
Ignore all non-printable characters.
|
||||
.It Fl n
|
||||
An initial numeric string, consisting of optional
|
||||
blank space, optional minus sign, and zero or more
|
||||
digits (including decimal point)
|
||||
.\" with
|
||||
.\" optional radix character and thousands
|
||||
.\" separator
|
||||
.\" (as defined in the current locale),
|
||||
is sorted by arithmetic value.
|
||||
(The
|
||||
.Fl n
|
||||
option no longer implies
|
||||
the
|
||||
.Fl b
|
||||
option.)
|
||||
.It Fl r
|
||||
Reverse the sense of comparisons.
|
||||
.El
|
||||
.Pp
|
||||
The treatment of field separators can be altered using the
|
||||
options:
|
||||
.Bl -tag -width indent
|
||||
.It Fl b
|
||||
Ignores leading blank space when determining the start
|
||||
and end of a restricted sort key.
|
||||
A
|
||||
.Fl b
|
||||
option specified before the first
|
||||
.Fl k
|
||||
option applies globally to all
|
||||
.Fl k
|
||||
options.
|
||||
Otherwise, the
|
||||
.Fl b
|
||||
option can be
|
||||
attached independently to each
|
||||
.Ar field
|
||||
argument of the
|
||||
.Fl k
|
||||
option (see below).
|
||||
Note that the
|
||||
.Fl b
|
||||
option
|
||||
has no effect unless key fields are specified.
|
||||
.It Fl t Ar char
|
||||
.Ar Char
|
||||
is used as the field separator character. The initial
|
||||
.Ar char
|
||||
is not considered to be part of a field when determining
|
||||
key offsets (see below).
|
||||
Each occurrence of
|
||||
.Ar char
|
||||
is significant (for example,
|
||||
.Dq Ar charchar
|
||||
delimits an empty field).
|
||||
If
|
||||
.Fl t
|
||||
is not specified,
|
||||
blank space characters are used as default field
|
||||
separators.
|
||||
.It Fl T Ar char
|
||||
.Ar Char
|
||||
is used as the record separator character.
|
||||
This should be used with discretion;
|
||||
.Fl T Ar <alphanumeric>
|
||||
usually produces undesirable results.
|
||||
The default line separator is newline.
|
||||
.It Fl k Ar field1[,field2]
|
||||
Designates the starting position,
|
||||
.Ar field1 ,
|
||||
and optional ending position,
|
||||
.Ar field2 ,
|
||||
of a key field.
|
||||
The
|
||||
.Fl k
|
||||
option replaces the obsolescent options
|
||||
.Cm \(pl Ns Ar pos1
|
||||
and
|
||||
.Fl Ns Ar pos2 .
|
||||
.El
|
||||
.Pp
|
||||
The following operands are available:
|
||||
.Bl -tag -width indent
|
||||
.Ar file
|
||||
The pathname of a file to be sorted, merged, or checked.
|
||||
If no file
|
||||
operands are specified, or if
|
||||
a file operand is
|
||||
.Fl ,
|
||||
the standard input is used.
|
||||
.Pp
|
||||
A field is
|
||||
defined as a minimal sequence of characters followed by a
|
||||
field separator or a newline character.
|
||||
By default, the first
|
||||
blank space of a sequence of blank spaces acts as the field separator.
|
||||
All blank spaces in a sequence of blank spaces are considered
|
||||
as part of the next field; for example, all blank spaces at
|
||||
the beginning of a line are considered to be part of the
|
||||
first field.
|
||||
.Pp
|
||||
Fields are specified
|
||||
by the
|
||||
.Fl k Ar field1[,field2]
|
||||
argument. A missing
|
||||
.Ar field2
|
||||
argument defaults to the end of a line.
|
||||
.Pp
|
||||
The arguments
|
||||
.Ar field1
|
||||
and
|
||||
.Ar field2
|
||||
have the form
|
||||
.Em m.n
|
||||
followed by one or more of the options
|
||||
.Fl b , d , f , i ,
|
||||
.Fl n , r .
|
||||
A
|
||||
.Ar field1
|
||||
position specified by
|
||||
.Em m.n
|
||||
.Em (m,n > 0)
|
||||
is interpreted as the
|
||||
.Em n Ns th
|
||||
character in the
|
||||
.Em m Ns th
|
||||
field.
|
||||
A missing
|
||||
.Em \&.n
|
||||
in
|
||||
.Ar field1
|
||||
means
|
||||
.Ql \&.1 ,
|
||||
indicating the first character of the
|
||||
.Em m Ns th
|
||||
field;
|
||||
If the
|
||||
.Fl b
|
||||
option is in effect,
|
||||
.Em n
|
||||
is counted from the first
|
||||
non-blank character in the
|
||||
.Em m Ns th
|
||||
field;
|
||||
.Em m Ns \&.1b
|
||||
refers to the first
|
||||
non-blank character in the
|
||||
.Em m Ns th
|
||||
field.
|
||||
.Pp
|
||||
A
|
||||
.Ar field2
|
||||
position specified by
|
||||
.Em m.n
|
||||
is interpreted as
|
||||
the
|
||||
.Em n Ns th
|
||||
character (including separators) of the
|
||||
.Em m Ns th
|
||||
field.
|
||||
A missing
|
||||
.Em \&.n
|
||||
indicates the last character of the
|
||||
.Em m Ns th
|
||||
field;
|
||||
.Em m
|
||||
= \&0
|
||||
designates the end of a line.
|
||||
Thus the option
|
||||
.Fl k Ar v.x,w.y
|
||||
is synonymous with the obsolescent option
|
||||
.Cm \(pl Ns Ar v-\&1.x-\&1
|
||||
.Fl Ns Ar w-\&1.y ;
|
||||
when
|
||||
.Em y
|
||||
is omitted,
|
||||
.Fl k Ar v.x,w
|
||||
is synonymous with
|
||||
.Cm \(pl Ns Ar v-\&1.x-\&1
|
||||
.Fl Ns Ar w+1.0 .
|
||||
The obsolescent
|
||||
.Cm \(pl Ns Ar pos1
|
||||
.Fl Ns Ar pos2
|
||||
option is still supported, except for
|
||||
.Fl Ns Ar w\&.0b,
|
||||
which has no
|
||||
.Fl k
|
||||
equivalent.
|
||||
.Sh FILES
|
||||
.Bl -tag -width Pa -compact
|
||||
.It Pa /var/tmp/sort.*
|
||||
Default temporary directories.
|
||||
.It Pa Ar output Ns #PID
|
||||
Temporary name for
|
||||
.Ar output
|
||||
if
|
||||
.Ar output
|
||||
already exists.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr comm 1 ,
|
||||
.Xr uniq 1 ,
|
||||
.Xr join 1
|
||||
.Sh RETURN VALUES
|
||||
Sort exits with one of the following values:
|
||||
.Bl -tag -width flag -compact
|
||||
.It Pa 0:
|
||||
normal behavior.
|
||||
.It Pa 1:
|
||||
on disorder (or non-uniqueness) with the
|
||||
.Fl c
|
||||
option
|
||||
.It Pa 2:
|
||||
an error occurred.
|
||||
.Sh BUGS
|
||||
Lines longer than 65522 characters are discarded and processing continues.
|
||||
To sort files larger than 60Mb, use
|
||||
.Nm sort
|
||||
.Fl H ;
|
||||
files larger than 704Mb must be sorted in smaller pieces, then merged.
|
||||
To protect data
|
||||
.Nm sort
|
||||
.Fl o
|
||||
calls link and unlink, and thus fails in protected directories.
|
||||
.Sh HISTORY
|
||||
A
|
||||
.Nm sort
|
||||
command appeared in
|
||||
.At v6 .
|
||||
.Sh NOTES
|
||||
The current sort command uses lexicographic radix sorting, which requires
|
||||
that sort keys be kept in memory (as opposed to previous versions which used quick
|
||||
and merge sorts and did not.)
|
||||
Thus performance depends highly on efficient choice of sort keys, and the
|
||||
.Fl b
|
||||
option and the
|
||||
.Ar field2
|
||||
argument of the
|
||||
.Fl k
|
||||
option should be used whenever possible.
|
||||
Similarly,
|
||||
.Nm sort
|
||||
.Fl k1f
|
||||
is equivalent to
|
||||
.Nm sort
|
||||
.Fl f
|
||||
and may take twice as long.
|
279
usr.bin/sort/sort.c
Normal file
279
usr.bin/sort/sort.c
Normal file
@ -0,0 +1,279 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char copyright[] =
|
||||
"@(#) Copyright (c) 1993\n\
|
||||
The Regents of the University of California. All rights reserved.\n";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
/* Sort sorts a file using an optional user-defined key.
|
||||
* Sort uses radix sort for internal sorting, and allows
|
||||
* a choice of merge sort and radix sort for external sorting.
|
||||
*/
|
||||
|
||||
#include "sort.h"
|
||||
#include "fsort.h"
|
||||
#include "pathnames.h"
|
||||
|
||||
#include <paths.h>
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int REC_D = '\n';
|
||||
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
|
||||
/*
|
||||
* weight tables. Gweights is one of ascii, Rascii..
|
||||
* modified to weight rec_d = 0 (or 255)
|
||||
*/
|
||||
extern u_char gweights[NBINS];
|
||||
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
|
||||
/*
|
||||
* masks of ignored characters. Alltable is 256 ones
|
||||
*/
|
||||
u_char dtable[NBINS], itable[NBINS], alltable[NBINS];
|
||||
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
|
||||
struct coldesc clist[(ND+1)*2];
|
||||
int ncols = 0;
|
||||
extern struct coldesc clist[(ND+1)*2];
|
||||
extern int ncols;
|
||||
|
||||
char devstdin[] = _PATH_STDIN;
|
||||
char toutpath[_POSIX_PATH_MAX];
|
||||
|
||||
static void cleanup __P((void));
|
||||
static void onsig __P((int));
|
||||
static void usage __P((char *));
|
||||
|
||||
int
|
||||
main(argc, argv)
|
||||
int argc;
|
||||
char *argv[];
|
||||
{
|
||||
extern int optind;
|
||||
extern char *optarg;
|
||||
int (*get)();
|
||||
int ch, i, stdinflag = 0, tmp = 0;
|
||||
char cflag = 0, mflag = 0, nflag = 0;
|
||||
char *outfile, *outpath = 0;
|
||||
struct field fldtab[ND+2], *ftpos;
|
||||
union f_handle filelist;
|
||||
FILE *outfd;
|
||||
memset(fldtab, 0, (ND+2)*sizeof(struct field));
|
||||
memset(d_mask, 0, NBINS);
|
||||
d_mask[REC_D = '\n'] = REC_D_F;
|
||||
SINGL_FLD = SEP_FLAG = 0;
|
||||
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
|
||||
ftpos = fldtab;
|
||||
fixit(&argc, argv);
|
||||
while ((ch = getopt(argc, argv, "bcdfik:mHno:rt:T:ux")) != EOF) {
|
||||
switch (ch) {
|
||||
case 'b': fldtab->flags |= BI | BT;
|
||||
break;
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'f':
|
||||
case 'r': tmp |= optval(ch, 0);
|
||||
if (tmp & R && tmp & F)
|
||||
fldtab->weights = RFtable;
|
||||
else if (tmp & F)
|
||||
fldtab->weights = Ftable;
|
||||
else if(tmp & R)
|
||||
fldtab->weights = Rascii;
|
||||
fldtab->flags |= tmp;
|
||||
break;
|
||||
case 'o':
|
||||
outpath = optarg;
|
||||
break;
|
||||
case 'n':
|
||||
nflag = 1;
|
||||
setfield("1n", ++ftpos, fldtab->flags&(~R));
|
||||
break;
|
||||
case 'k':
|
||||
setfield(optarg, ++ftpos, fldtab->flags);
|
||||
break;
|
||||
case 't':
|
||||
if (SEP_FLAG)
|
||||
usage("multiple field delimiters");
|
||||
SEP_FLAG = 1;
|
||||
d_mask[' '] &= ~FLD_D;
|
||||
d_mask['\t'] &= ~FLD_D;
|
||||
d_mask[*optarg] |= FLD_D;
|
||||
if (d_mask[*optarg] & REC_D_F)
|
||||
err(2, "record/field delimiter clash");
|
||||
break;
|
||||
case 'T':
|
||||
if (REC_D != '\n')
|
||||
usage("multiple record delimiters");
|
||||
if ('\n' == (REC_D = *optarg))
|
||||
break;
|
||||
d_mask['\n'] = d_mask[' '];
|
||||
d_mask[REC_D] = REC_D_F;
|
||||
break;
|
||||
case 'u':
|
||||
UNIQUE = 1;
|
||||
break;
|
||||
case 'c':
|
||||
cflag = 1;
|
||||
break;
|
||||
case 'm':
|
||||
mflag = 1;
|
||||
break;
|
||||
case 'H':
|
||||
PANIC = 0;
|
||||
break;
|
||||
case '?':
|
||||
default: usage("");
|
||||
}
|
||||
}
|
||||
if (cflag && argc > optind+1)
|
||||
errx(2, "too many input files for -c option");
|
||||
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
|
||||
outpath = argv[argc-1];
|
||||
argc -= 2;
|
||||
}
|
||||
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
|
||||
errx(2, "too many input files for -m option");
|
||||
for (i = optind; i < argc; i++) {
|
||||
/* allow one occurrence of /dev/stdin */
|
||||
if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) {
|
||||
if (stdinflag)
|
||||
warnx("ignoring extra \"%s\" in file list",
|
||||
argv[i]);
|
||||
else {
|
||||
stdinflag = 1;
|
||||
argv[i] = devstdin;
|
||||
}
|
||||
} else if (ch = access(argv[i], R_OK))
|
||||
err(2, "%s", argv[i]);
|
||||
}
|
||||
if (!(fldtab->flags & (I|D) || fldtab[1].icol.num)) {
|
||||
SINGL_FLD = 1;
|
||||
fldtab[0].icol.num = 1;
|
||||
} else {
|
||||
if (!fldtab[1].icol.num) {
|
||||
fldtab[0].flags &= ~(BI|BT);
|
||||
setfield("1", ++ftpos, fldtab->flags);
|
||||
}
|
||||
if (nflag)
|
||||
fldtab[1].flags |= fldtab->flags;
|
||||
fldreset(fldtab);
|
||||
fldtab[0].flags &= ~F;
|
||||
}
|
||||
settables(fldtab[0].flags);
|
||||
num_init();
|
||||
fldtab->weights = gweights;
|
||||
if (optind == argc)
|
||||
argv[--optind] = devstdin;
|
||||
filelist.names = argv+optind;
|
||||
if (SINGL_FLD)
|
||||
get = makeline;
|
||||
else
|
||||
get = makekey;
|
||||
if (cflag) {
|
||||
order(filelist, get, fldtab);
|
||||
/* NOT REACHED */
|
||||
}
|
||||
if (!outpath) {
|
||||
(void)snprintf(toutpath,
|
||||
sizeof(toutpath), "%sstdout", _PATH_DEV);
|
||||
outfile = outpath = toutpath;
|
||||
} else if (!(ch = access(outpath, 0)) &&
|
||||
strncmp(_PATH_DEV, outpath, 5)) {
|
||||
struct sigaction act = {0, SIG_BLOCK, 6};
|
||||
int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ,
|
||||
SIGVTALRM, SIGPROF, 0};
|
||||
errno = 0;
|
||||
if (access(outpath, W_OK))
|
||||
err(2, "%s", outpath);
|
||||
act.sa_handler = cleanup;
|
||||
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXX", outpath);
|
||||
outfile = mktemp(toutpath);
|
||||
if (!outfile)
|
||||
err(2, "%s", toutpath);
|
||||
(void)atexit(cleanup);
|
||||
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
|
||||
sigaction(sigtable[i], &act, 0);
|
||||
} else outfile = outpath;
|
||||
if (!(outfd = fopen(outfile, "w")))
|
||||
err(2, "%s", outfile);
|
||||
if (mflag)
|
||||
fmerge(-1, filelist, argc-optind, get, outfd, putline, fldtab);
|
||||
else
|
||||
fsort(-1, 0, filelist, argc-optind, outfd, fldtab);
|
||||
if (outfile != outpath) {
|
||||
if (access(outfile, 0))
|
||||
err(2, "%s", outfile);
|
||||
(void)unlink(outpath);
|
||||
if (link(outfile, outpath))
|
||||
err(2, "cannot link %s: output left in %s",
|
||||
outpath, outfile);
|
||||
(void)unlink(outfile);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void
|
||||
onsig(s)
|
||||
int s;
|
||||
{
|
||||
cleanup();
|
||||
exit(2); /* return 2 on error/interrupt */
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup()
|
||||
{
|
||||
if (toutpath[0])
|
||||
(void)unlink(toutpath);
|
||||
}
|
||||
|
||||
static void
|
||||
usage(msg)
|
||||
char *msg;
|
||||
{
|
||||
if (msg)
|
||||
(void)fprintf(stderr, "sort: %s\n", msg);
|
||||
(void)fprintf(stderr, "usage: [-o output] [-cmubdfinr] [-t char] ");
|
||||
(void)fprintf(stderr, "[-T char] [-k keydef] ... [files]\n");
|
||||
exit(2);
|
||||
}
|
142
usr.bin/sort/sort.h
Normal file
142
usr.bin/sort/sort.h
Normal file
@ -0,0 +1,142 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)sort.h 8.1 (Berkeley) 6/6/93
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <db.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define NBINS 256
|
||||
#define MAXMERGE 16
|
||||
|
||||
/* values for masks, weights, and other flags. */
|
||||
#define I 1 /* mask out non-printable characters */
|
||||
#define D 2 /* sort alphanumeric characters only */
|
||||
#define N 4 /* Field is a number */
|
||||
#define F 8 /* weight lower and upper case the same */
|
||||
#define R 16 /* Field is reversed with respect to the global weight */
|
||||
#define BI 32 /* ignore blanks in icol */
|
||||
#define BT 64 /* ignore blanks in tcol */
|
||||
|
||||
/* masks for delimiters: blanks, fields, and termination. */
|
||||
#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
|
||||
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
|
||||
#define REC_D_F 4 /* '\n' default; from -T otherwise */
|
||||
|
||||
#define ND 10 /* limit on number of -k options. */
|
||||
|
||||
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define max(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define FCLOSE(file) { \
|
||||
if (EOF == fclose(file)) \
|
||||
err(2, "%s", file); \
|
||||
}
|
||||
|
||||
#define EWRITE(ptr, size, n, f) { \
|
||||
if (!fwrite(ptr, size, n, f)) \
|
||||
err(2, NULL); \
|
||||
}
|
||||
|
||||
/* length of record is currently limited to 2^16 - 1 */
|
||||
typedef u_short length_t;
|
||||
|
||||
#define SALIGN(n) ((n+1) & ~1)
|
||||
|
||||
/* a record is a key/line pair starting at rec.data. It has a total length
|
||||
* and an offset to the start of the line half of the pair.
|
||||
*/
|
||||
typedef struct recheader {
|
||||
length_t length;
|
||||
length_t offset;
|
||||
u_char data[1];
|
||||
} RECHEADER;
|
||||
|
||||
typedef struct trecheader {
|
||||
length_t length;
|
||||
length_t offset;
|
||||
} TRECHEADER;
|
||||
|
||||
/* This is the column as seen by struct field. It is used by enterfield.
|
||||
* They are matched with corresponding coldescs during initialization.
|
||||
*/
|
||||
struct column {
|
||||
struct coldesc *p;
|
||||
int num;
|
||||
int indent;
|
||||
};
|
||||
|
||||
/* a coldesc has a number and pointers to the beginning and end of the
|
||||
* corresponding column in the current line. This is determined in enterkey.
|
||||
*/
|
||||
typedef struct coldesc {
|
||||
u_char *start;
|
||||
u_char *end;
|
||||
int num;
|
||||
} COLDESC;
|
||||
|
||||
/* A field has an initial and final column; an omitted final column
|
||||
* implies the end of the line. Flags regulate omission of blanks and
|
||||
* numerical sorts; mask determines which characters are ignored (from -i, -d);
|
||||
* weights determines the sort weights of a character (from -f, -r).
|
||||
*/
|
||||
struct field {
|
||||
struct column icol;
|
||||
struct column tcol;
|
||||
u_int flags;
|
||||
u_char *mask;
|
||||
u_char *weights;
|
||||
};
|
||||
|
||||
union f_handle {
|
||||
int top;
|
||||
char **names;
|
||||
};
|
||||
extern int PANIC; /* maximum depth of fsort before fmerge is called */
|
||||
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
|
||||
extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
|
||||
extern u_char d_mask[NBINS];
|
||||
extern int SINGL_FLD, SEP_FLAG, UNIQUE;
|
||||
extern int REC_D;
|
||||
|
||||
#include "extern.h"
|
87
usr.bin/sort/tmp.c
Normal file
87
usr.bin/sort/tmp.c
Normal file
@ -0,0 +1,87 @@
|
||||
/*-
|
||||
* Copyright (c) 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Peter McIlroy.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)tmp.c 8.1 (Berkeley) 6/6/93";
|
||||
#endif /* not lint */
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "pathnames.h"
|
||||
|
||||
#define _NAME_TMP "sort.XXXXXXXX"
|
||||
|
||||
FILE *
|
||||
ftmp()
|
||||
{
|
||||
static char *envtmp;
|
||||
sigset_t set, oset;
|
||||
static int first = 0;
|
||||
FILE *fd;
|
||||
char pathb[_POSIX_PATH_MAX], *path;
|
||||
|
||||
path = pathb;
|
||||
if (!first && !envtmp) {
|
||||
envtmp = getenv("TMPDIR");
|
||||
first = 1;
|
||||
}
|
||||
if (envtmp)
|
||||
(void)snprintf(path,
|
||||
sizeof(pathb), "%s/%s", envtmp, _NAME_TMP);
|
||||
else {
|
||||
memmove(path, _PATH_SORTTMP, sizeof(_PATH_SORTTMP));
|
||||
}
|
||||
sigfillset(&set);
|
||||
(void)sigprocmask(SIG_BLOCK, &set, &oset);
|
||||
path = mktemp(path);
|
||||
if (!path)
|
||||
err(2, "%s");
|
||||
if (!(fd = fopen(path, "w+")))
|
||||
err(2, "%s", path);
|
||||
(void)unlink(path);
|
||||
|
||||
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
|
||||
return (fd);
|
||||
};
|
Loading…
Reference in New Issue
Block a user