Add RCS ids.

This commit is contained in:
mycroft 1994-06-16 05:26:34 +00:00
parent 2c1d50080b
commit 2f86deea8e
6 changed files with 617 additions and 547 deletions

View File

@ -1,6 +1,9 @@
/*-
* Copyright (c) 1991 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -32,14 +35,14 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)heapsort.c 5.1 (Berkeley) 6/4/91";*/
static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
/*static char sccsid[] = "from: @(#)heapsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: heapsort.c,v 1.4 1994/06/16 05:26:34 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
#include <sys/types.h>
#include <errno.h>
#include <stdlib.h>
#include <stddef.h>
/*
* Swap two areas of size number of bytes. Although qsort(3) permits random
@ -48,13 +51,23 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
* arithmetic gets lost in the time required for comparison function calls.
*/
#define SWAP(a, b) { \
cnt = size; \
#define SWAP(a, b, count, size, tmp) { \
count = size; \
do { \
ch = *a; \
tmp = *a; \
*a++ = *b; \
*b++ = ch; \
} while (--cnt); \
*b++ = tmp; \
} while (--count); \
}
/* Copy one block of size size to another. */
#define COPY(a, b, count, size, tmp1, tmp2) { \
count = size; \
tmp1 = a; \
tmp2 = b; \
do { \
*tmp1++ = *tmp2++; \
} while (--count); \
}
/*
@ -63,21 +76,59 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
*
* There two cases. If j == nmemb, select largest of Ki and Kj. If
* j < nmemb, select largest of Ki, Kj and Kj+1.
*
* The initial value depends on if we're building the initial heap or
* reconstructing it after saving a value.
*/
#define HEAP(initval) { \
for (i = initval; (j = i * 2) <= nmemb; i = j) { \
p = (char *)bot + j * size; \
if (j < nmemb && compar(p, p + size) < 0) { \
p += size; \
++j; \
#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
par_i = child_i) { \
child = base + child_i * size; \
if (child_i < nmemb && compar(child, child + size) < 0) { \
child += size; \
++child_i; \
} \
t = (char *)bot + i * size; \
if (compar(p, t) <= 0) \
par = base + par_i * size; \
if (compar(child, par) <= 0) \
break; \
SWAP(t, p); \
SWAP(par, child, count, size, tmp); \
} \
}
/*
* Select the top of the heap and 'heapify'. Since by far the most expensive
* action is the call to the compar function, a considerable optimization
* in the average case can be achieved due to the fact that k, the displaced
* elememt, is ususally quite small, so it would be preferable to first
* heapify, always maintaining the invariant that the larger child is copied
* over its parent's record.
*
* Then, starting from the *bottom* of the heap, finding k's correct place,
* again maintianing the invariant. As a result of the invariant no element
* is 'lost' when k is assigned its correct place in the heap.
*
* The time savings from this optimization are on the order of 15-20% for the
* average case. See Knuth, Vol. 3, page 158, problem 18.
*
* XXX Don't break the #define SELECT line, below. Reiser cpp gets upset.
*/
#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
child = base + child_i * size; \
if (child_i < nmemb && compar(child, child + size) < 0) { \
child += size; \
++child_i; \
} \
par = base + par_i * size; \
COPY(par, child, count, size, tmp1, tmp2); \
} \
for (;;) { \
child_i = par_i; \
par_i = child_i / 2; \
child = base + child_i * size; \
par = base + par_i * size; \
if (child_i == 1 || compar(k, par) < 0) { \
COPY(child, k, count, size, tmp1, tmp2); \
break; \
} \
COPY(child, par, count, size, tmp1, tmp2); \
} \
}
@ -86,41 +137,49 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
* and worst. While heapsort is faster than the worst case of quicksort,
* the BSD quicksort does median selection so that the chance of finding
* a data set that will trigger the worst case is nonexistent. Heapsort's
* only advantage over quicksort is that it requires no additional memory.
* only advantage over quicksort is that it requires little additional memory.
*/
heapsort(bot, nmemb, size, compar)
register void *bot;
register size_t nmemb, size;
int
heapsort(vbase, nmemb, size, compar)
void *vbase;
size_t nmemb, size;
int (*compar) __P((const void *, const void *));
{
register char *p, *t, ch;
register int cnt, i, j, l;
register char tmp, *tmp1, *tmp2;
char *base, *k, *p, *t;
if (nmemb <= 1)
return (0);
if (!size) {
errno = EINVAL;
return (-1);
}
if ((k = malloc(size)) == NULL)
return (-1);
/*
* Items are numbered from 1 to nmemb, so offset from size bytes
* below the starting address.
*/
bot -= size;
base = (char *)vbase - size;
for (l = nmemb / 2 + 1; --l;)
HEAP(l);
CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
/*
* For each element of the heap, save the largest element into its
* final slot, then recreate the heap.
* final slot, save the displaced element (k), then recreate the
* heap.
*/
while (nmemb > 1) {
p = (char *)bot + size;
t = (char *)bot + nmemb * size;
SWAP(p, t);
COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
--nmemb;
HEAP(1);
SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
}
free(k);
return (0);
}

View File

@ -35,7 +35,8 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)merge.c 8.2 (Berkeley) 2/14/94";
/*static char sccsid[] = "from: @(#)merge.c 8.2 (Berkeley) 2/14/94";*/
static char *rcsid = "$Id: merge.c,v 1.2 1994/06/16 05:26:36 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */
/*

View File

@ -1,5 +1,5 @@
.\" Copyright (c) 1990, 1991 The Regents of the University of California.
.\" All rights reserved.
.\" Copyright (c) 1990, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the American National Standards Committee X3, on Information
@ -33,14 +33,14 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" from: @(#)qsort.3 6.7 (Berkeley) 6/29/91
.\" $Id: qsort.3,v 1.2 1993/08/01 07:44:22 mycroft Exp $
.\" from: @(#)qsort.3 8.1 (Berkeley) 6/4/93
.\" $Id: qsort.3,v 1.3 1994/06/16 05:26:38 mycroft Exp $
.\"
.Dd June 29, 1991
.Dd June 4, 1993
.Dt QSORT 3
.Os
.Sh NAME
.Nm qsort, heapsort
.Nm qsort, heapsort, mergesort
.Nd sort functions
.Sh SYNOPSIS
.Fd #include <stdlib.h>
@ -48,6 +48,8 @@
.Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Ft int
.Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Ft int
.Fn mergesort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Sh DESCRIPTION
The
.Fn qsort
@ -55,6 +57,10 @@ function is a modified partition-exchange sort, or quicksort.
The
.Fn heapsort
function is a modified selection sort.
The
.Fn mergesort
function is a modified merge sort with exponential search
intended for sorting data with pre-existing order.
.Pp
The
.Fn qsort
@ -66,11 +72,20 @@ objects, the initial member of which is pointed to by
.Fa base .
The size of each object is specified by
.Fa size .
.Fn Mergesort
behaves similarly, but
.Em requires
that
.Fa size
be greater than
.Dq "sizeof(void *) / 2" .
.Pp
The contents of the array are sorted in ascending order according to
The contents of the array
.Fa base
are sorted in ascending order according to
a comparison function pointed to by
.Fa compar ,
which is called with two arguments that point to the objects being
which requires two arguments pointing to the objects being
compared.
.Pp
The comparison function must return an integer less than, equal to, or
@ -85,6 +100,9 @@ are
.Em not
stable, that is, if two members compare as equal, their order in
the sorted array is undefined.
The function
.Fn mergesort
is stable.
.Pp
The
.Fn qsort
@ -93,7 +111,7 @@ a variant of partition-exchange sorting; in particular, see D.E. Knuth's
Algorithm Q.
.Fn Qsort
takes O N lg N average time.
This implementation uses median selection to avoid the traditional
This implementation uses median selection to avoid its
O N**2 worst-case behavior.
.Pp
The
@ -106,7 +124,28 @@ Its
.Em only
advantage over
.Fn qsort
is that it uses no additional memory.
is that it uses almost no additional memory; while
.Fn qsort
does not allocate memory, it is implemented using recursion.
.Pp
The function
.Fn mergesort
requires additional memory of size
.Fa nmemb *
.Fa size
bytes; it should be used only when space is not at a premium.
.Fn Mergesort
is optimized for data with pre-existing order; its worst case
time is O N lg N; its best case is O N.
.Pp
Normally,
.Fn qsort
is faster than
.Fn mergesort
is faster than
.Fn heapsort .
Memory availability and pre-existing order in the data can make this
untrue.
.Sh RETURN VALUES
The
.Fn qsort
@ -115,8 +154,10 @@ returns no value.
.Pp
Upon successful completion,
.Fn heapsort
returns 0.
Otherwise, it returns \-1 and the global variable
and
.Fn mergesort
return 0.
Otherwise, they return \-1 and the global variable
.Va errno
is set to indicate the error.
.Sh ERRORS
@ -127,11 +168,23 @@ function succeeds unless:
.It Bq Er EINVAL
The
.Fa size
argument is zero.
argument is zero, or,
the
.Fa size
argument to
.Fn mergesort
is less than
.Dq "sizeof(void *) / 2" .
.It Bq Er ENOMEM
.Fn Heapsort
or
.Fn mergesort
were unable to allocate memory.
.El
.Sh COMPATIBILITY
Previous versions of
.Fn qsort
did not permit the comparison routine to itself call
did not permit the comparison routine itself to call
.Fn qsort 3 .
This is no longer true.
.Sh SEE ALSO
@ -161,6 +214,18 @@ This is no longer true.
.%T "Sorting and Searching"
.%P pp. 114-123, 145-149
.Re
.Rs
.%A Mcilroy, P.M.
.%T "Optimistic Sorting and Information Theoretic Complexity"
.%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms"
.%V January 1992
.Re
.Rs
.%A Bentley, J.L.
.%T "Engineering a Sort Function"
.%J "bentley@research.att.com"
.%V January 1992
.Re
.Sh STANDARDS
The
.Fn qsort

View File

@ -1,6 +1,6 @@
/*-
* Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -32,245 +32,144 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)qsort.c 5.9 (Berkeley) 2/23/91";*/
static char *rcsid = "$Id: qsort.c,v 1.3 1993/08/26 00:48:06 jtc Exp $";
/*static char sccsid[] = "from: @(#)qsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: qsort.c,v 1.4 1994/06/16 05:26:39 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */
#include <sys/types.h>
#include <stdlib.h>
/*
* MTHRESH is the smallest partition for which we compare for a median
* value instead of using the middle value.
*/
#define MTHRESH 6
static inline char *med3 __P((char *, char *, char *, int (*)()));
static inline void swapfunc __P((char *, char *, int, int));
#define min(a, b) (a) < (b) ? a : b
/*
* THRESH is the minimum number of entries in a partition for continued
* partitioning.
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
*/
#define THRESH 4
#define swapcode(TYPE, parmi, parmj, n) { \
long i = (n) / sizeof (TYPE); \
register TYPE *pi = (TYPE *) (parmi); \
register TYPE *pj = (TYPE *) (parmj); \
do { \
register TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while (--i > 0); \
}
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
static inline void
swapfunc(a, b, n, swaptype)
char *a, *b;
int n, swaptype;
{
if(swaptype <= 1)
swapcode(long, a, b, n)
else
swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
long t = *(long *)(a); \
*(long *)(a) = *(long *)(b); \
*(long *)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
static inline char *
med3(a, b, c, cmp)
char *a, *b, *c;
int (*cmp)();
{
return cmp(a, b) < 0 ?
(cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
:(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
}
void
qsort(bot, nmemb, size, compar)
void *bot;
size_t nmemb, size;
int (*compar) __P((const void *, const void *));
qsort(a, n, es, cmp)
void *a;
size_t n, es;
int (*cmp)();
{
static void insertion_sort(), quick_sort();
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
int d, r, swaptype, swap_cnt;
if (nmemb <= 1)
loop: SWAPINIT(a, es);
swap_cnt = 0;
if (n < 7) {
for (pm = a + es; pm < (char *) a + n * es; pm += es)
for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return;
if (nmemb >= THRESH)
quick_sort(bot, nmemb, size, compar);
else
insertion_sort(bot, nmemb, size, compar);
}
/*
* Swap two areas of size number of bytes. Although qsort(3) permits random
* blocks of memory to be sorted, sorting pointers is almost certainly the
* common case (and, were it not, could easily be made so). Regardless, it
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
* arithmetic gets lost in the time required for comparison function calls.
*/
#define SWAP(a, b) { \
cnt = size; \
do { \
ch = *a; \
*a++ = *b; \
*b++ = ch; \
} while (--cnt); \
}
/*
* Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
* of straight insertion sort after partitioning is complete is better than
* sorting each small partition as it is created. This isn't correct in this
* implementation because comparisons require at least one (and often two)
* function calls and are likely to be the dominating expense of the sort.
* Doing a final insertion sort does more comparisons than are necessary
* because it compares the "edges" and medians of the partitions which are
* known to be already sorted.
*
* This is also the reasoning behind selecting a small THRESH value (see
* Knuth, page 122, equation 26), since the quicksort algorithm does less
* comparisons than the insertion sort.
*/
#define SORT(bot, n) { \
if (n > 1) \
if (n == 2) { \
t1 = bot + size; \
if (compar(t1, bot) < 0) \
SWAP(t1, bot); \
} else \
insertion_sort(bot, n, size, compar); \
}
static void
quick_sort(bot, nmemb, size, compar)
register char *bot;
register int size;
int nmemb, (*compar)();
{
register int cnt;
register u_char ch;
register char *top, *mid, *t1, *t2;
register int n1, n2;
char *bsv;
static void insertion_sort();
/* bot and nmemb must already be set. */
partition:
/* find mid and top elements */
mid = bot + size * (nmemb >> 1);
top = bot + (nmemb - 1) * size;
/*
* Find the median of the first, last and middle element (see Knuth,
* Vol. 3, page 123, Eq. 28). This test order gets the equalities
* right.
*/
if (nmemb >= MTHRESH) {
n1 = compar(bot, mid);
n2 = compar(mid, top);
if (n1 < 0 && n2 > 0)
t1 = compar(bot, top) < 0 ? top : bot;
else if (n1 > 0 && n2 < 0)
t1 = compar(bot, top) > 0 ? top : bot;
else
t1 = mid;
/* if mid element not selected, swap selection there */
if (t1 != mid) {
SWAP(t1, mid);
mid -= size;
}
}
/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
#define didswap n1
#define newbot t1
#define replace t2
didswap = 0;
for (bsv = bot;;) {
for (; bot < mid && compar(bot, mid) <= 0; bot += size);
while (top > mid) {
if (compar(mid, top) <= 0) {
top -= size;
continue;
}
newbot = bot + size; /* value of bot after swap */
if (bot == mid) /* top <-> mid, mid == top */
replace = mid = top;
else { /* bot <-> top */
replace = top;
top -= size;
}
goto swap;
pm = a + (n / 2) * es;
if (n > 7) {
pl = a;
pn = a + (n - 1) * es;
if (n > 40) {
d = (n / 8) * es;
pl = med3(pl, pl + d, pl + 2 * d, cmp);
pm = med3(pm - d, pm, pm + d, cmp);
pn = med3(pn - 2 * d, pn - d, pn, cmp);
}
if (bot == mid)
pm = med3(pl, pm, pn, cmp);
}
swap(a, pm);
pa = pb = a + es;
pc = pd = a + (n - 1) * es;
for (;;) {
while (pb <= pc && (r = cmp(pb, a)) <= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pa, pb);
pa += es;
}
pb += es;
}
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pc, pd);
pd -= es;
}
pc -= es;
}
if (pb > pc)
break;
/* bot <-> mid, mid == bot */
replace = mid;
newbot = mid = bot; /* value of bot after swap */
top -= size;
swap: SWAP(bot, replace);
bot = newbot;
didswap = 1;
swap(pb, pc);
swap_cnt = 1;
pb += es;
pc -= es;
}
/*
* Quicksort behaves badly in the presence of data which is already
* sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
* To avoid this worst case behavior, if a re-partitioning occurs
* without swapping any elements, it is not further partitioned and
* is insert sorted. This wins big with almost sorted data sets and
* only loses if the data set is very strangely partitioned. A fix
* for those data sets would be to return prematurely if the insertion
* sort routine is forced to make an excessive number of swaps, and
* continue the partitioning.
*/
if (!didswap) {
insertion_sort(bsv, nmemb, size, compar);
if (swap_cnt == 0) { /* Switch to insertion sort */
for (pm = a + es; pm < (char *) a + n * es; pm += es)
for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
/*
* Re-partition or sort as necessary. Note that the mid element
* itself is correctly positioned and can be ignored.
*/
#define nlower n1
#define nupper n2
bot = bsv;
nlower = (mid - bot) / size; /* size of lower partition */
mid += size;
nupper = nmemb - nlower - 1; /* size of upper partition */
/*
* If must call recursively, do it on the smaller partition; this
* bounds the stack to lg N entries.
*/
if (nlower > nupper) {
if (nupper >= THRESH)
quick_sort(mid, nupper, size, compar);
else {
SORT(mid, nupper);
if (nlower < THRESH) {
SORT(bot, nlower);
return;
}
}
nmemb = nlower;
} else {
if (nlower >= THRESH)
quick_sort(bot, nlower, size, compar);
else {
SORT(bot, nlower);
if (nupper < THRESH) {
SORT(mid, nupper);
return;
}
}
bot = mid;
nmemb = nupper;
}
goto partition;
/* NOTREACHED */
}
static void
insertion_sort(bot, nmemb, size, compar)
char *bot;
register int size;
int nmemb, (*compar)();
{
register int cnt;
register u_char ch;
register char *s1, *s2, *t1, *t2, *top;
/*
* A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
* S). Insertion sort has the same worst case as most simple sorts
* (O N^2). It gets used here because it is (O N) in the case of
* sorted data.
*/
top = bot + nmemb * size;
for (t1 = bot + size; t1 < top;) {
for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
if (t1 != (t2 += size)) {
/* Bubble bytes up through each element. */
for (cnt = size; cnt--; ++t1) {
ch = *t1;
for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
*s1 = *s2;
*s1 = ch;
}
} else
t1 += size;
pn = a + n * es;
r = min(pa - (char *)a, pb - pa);
vecswap(a, pb - r, r);
r = min(pd - pc, pn - pd - es);
vecswap(pb, pn - r, r);
if ((r = pb - pa) > es)
qsort(a, r / es, es, cmp);
if ((r = pd - pc) > es) {
/* Iterate rather than recurse to save stack space */
a = pn - r;
n = r / es;
goto loop;
}
/* qsort(pn - r, r / es, es, cmp);*/
}

View File

@ -1,5 +1,5 @@
.\" Copyright (c) 1990, 1991 The Regents of the University of California.
.\" All rights reserved.
.\" Copyright (c) 1990, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
@ -29,10 +29,10 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" from: @(#)radixsort.3 5.5 (Berkeley) 4/19/91
.\" $Id: radixsort.3,v 1.2 1993/08/01 07:44:21 mycroft Exp $
.\" from: @(#)radixsort.3 8.2 (Berkeley) 1/27/94
.\" $Id: radixsort.3,v 1.3 1994/06/16 05:26:40 mycroft Exp $
.\"
.Dd April 19, 1991
.Dd January 27, 1994
.Dt RADIXSORT 3
.Os
.Sh NAME
@ -42,27 +42,23 @@
.Fd #include <limits.h>
.Fd #include <stdlib.h>
.Ft int
.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_char endbyte"
.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
.Ft int
.Fn sradixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
.Sh DESCRIPTION
The
.Fn radixsort
function
is a modified radix sort.
and
.Fn sradixsort
functions
are implementations of radix sort.
.Pp
The
.Fn radixsort
function sorts an array of
.Fa nmemb
pointers to byte strings, the initial member of which is referenced
by
These functions sort an array of pointers to byte strings, the initial
member of which is referenced by
.Fa base .
The byte strings may contain any values; the end of each string
is denoted by the user-specified value
.Fa endbyte .
The contents of the array are sorted in ascending order according
to the
.Tn ASCII
order of the byte strings they reference.
.Pp
Applications may specify a sort order by providing the
.Fa table
@ -74,38 +70,58 @@ must reference an array of
.Dv UCHAR_MAX
+ 1 bytes which contains the sort
weight of each possible byte value.
The end-of-string byte must have a sort weight of 0.
The end-of-string byte must have a sort weight of 0 or 255
(for sorting in reverse order).
More than one byte may have the same sort weight.
The
.Fa table
argument
is useful for applications which wish to sort different characters
equally; for example, providing a table with the same weights
equally, for example, providing a table with the same weights
for A-Z as for a-z will result in a case-insensitive sort.
If
.Fa table
is NULL, the contents of the array are sorted in ascending order
according to the
.Tn ASCII
order of the byte strings they reference and
.Fa endbyte
has a sorting weight of 0.
.Pp
The
.Fn sradixsort
function is stable, that is, if two elements compare as equal, their
order in the sorted array is unchanged.
The
.Fn sradixsort
function uses additional memory sufficient to hold
.Fa nmemb
pointers.
.Pp
The
.Fn radixsort
function
is stable, that is, if two elements compare as equal, their order in
the sorted array is unchanged.
function is not stable, but uses no additional memory.
.Pp
The
.Fn radixsort
function
is a variant of most-significant-byte radix sorting; in particular, see
D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
The
.Fn radixsort
function
takes linear time relative to the number of bytes in the strings.
These functions are variants of most-significant-byte radix sorting; in
particular, see D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
They take linear time relative to the number of bytes in the strings.
.Sh RETURN VALUES
Upon successful completion 0 is returned.
Otherwise, \-1 is returned and the global variable
.Va errno
is set to indicate the error.
.Sh ERRORS
The
.Fn radixsort
.Bl -tag -width Er
.It Bq Er EINVAL
The value of the
.Fa endbyte
element of
.Fa table
is not 0 or 255.
.El
.Pp
Additionally, the
.Fn sradixsort
function
may fail and set
.Va errno
@ -131,14 +147,15 @@ for any of the errors specified for the library routine
.%V Vol. 16
.%N No. 6
.Re
.Rs
.%A McIlroy, P.
.%D 1993
.%B "Engineering Radix Sort"
.%T "Computing Systems"
.%V Vol. 6:1
.%P pp. 5-27
.Re
.Sh HISTORY
The
.Fn radixsort
function is
.Ud .
.Sh BUGS
The
.Fa nmemb
argument
must be less than the maximum integer,
.Dv INT_MAX .
function first appeared in 4.4BSD.

View File

@ -1,6 +1,9 @@
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy and by Dan Bernstein at New York University,
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -32,259 +35,285 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)radixsort.c 5.7 (Berkeley) 2/23/91";*/
static char *rcsid = "$Id: radixsort.c,v 1.3 1993/08/26 00:48:07 jtc Exp $";
/*static char sccsid[] = "from: @(#)radixsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: radixsort.c,v 1.4 1994/06/16 05:26:44 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */
/*
* Radixsort routines.
*
* Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
* Use radixsort(a, n, trace, endchar) for this case.
*
* For stable sorting (using N extra pointers) use sradixsort(), which calls
* r_sort_b().
*
* For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
* "Engineering Radix Sort".
*/
#include <sys/types.h>
#include <limits.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <errno.h>
/*
* __rspartition is the cutoff point for a further partitioning instead
* of a shellsort. If it changes check __rsshell_increments. Both of
* these are exported, as the best values are data dependent.
*/
#define NPARTITION 40
int __rspartition = NPARTITION;
int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 };
typedef struct {
const u_char **sa;
int sn, si;
} stack;
/*
* Stackp points to context structures, where each structure schedules a
* partitioning. Radixsort exits when the stack is empty.
*
* If the buckets are placed on the stack randomly, the worst case is when
* all the buckets but one contain (npartitions + 1) elements and the bucket
* pushed on the stack last contains the rest of the elements. In this case,
* stack growth is bounded by:
*
* limit = (nelements / (npartitions + 1)) - 1;
*
* This is a very large number, 52,377,648 for the maximum 32-bit signed int.
*
* By forcing the largest bucket to be pushed on the stack first, the worst
* case is when all but two buckets each contain (npartitions + 1) elements,
* with the remaining elements split equally between the first and last
* buckets pushed on the stack. In this case, stack growth is bounded when:
*
* for (partition_cnt = 0; nelements > npartitions; ++partition_cnt)
* nelements =
* (nelements - (npartitions + 1) * (nbuckets - 2)) / 2;
* The bound is:
*
* limit = partition_cnt * (nbuckets - 1);
*
* This is a much smaller number, 4590 for the maximum 32-bit signed int.
*/
#define NBUCKETS (UCHAR_MAX + 1)
static inline void simplesort
__P((const u_char **, int, int, const u_char *, u_int));
static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
static void r_sort_b __P((const u_char **,
const u_char **, int, int, const u_char *, u_int));
typedef struct _stack {
const u_char **bot;
int indx, nmemb;
} CONTEXT;
#define THRESHOLD 20 /* Divert to simplesort(). */
#define SIZE 512 /* Default stack size. */
#define STACKPUSH { \
stackp->bot = p; \
stackp->nmemb = nmemb; \
stackp->indx = indx; \
++stackp; \
}
#define STACKPOP { \
if (stackp == stack) \
break; \
--stackp; \
bot = stackp->bot; \
nmemb = stackp->nmemb; \
indx = stackp->indx; \
#define SETUP { \
if (tab == NULL) { \
tr = tr0; \
for (c = 0; c < endch; c++) \
tr0[c] = c + 1; \
tr0[c] = 0; \
for (c++; c < 256; c++) \
tr0[c] = c; \
endch = 0; \
} else { \
endch = tab[endch]; \
tr = tab; \
if (endch != 0 && endch != 255) { \
errno = EINVAL; \
return (-1); \
} \
} \
}
/*
* A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5,
* Ex. 10 and 12. Also, "Three Partition Refinement Algorithms, Paige
* and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987.
*
* This uses a simple sort as soon as a bucket crosses a cutoff point,
* rather than sorting the entire list after partitioning is finished.
* This should be an advantage.
*
* This is pure MSD instead of LSD of some number of MSD, switching to
* the simple sort as soon as possible. Takes linear time relative to
* the number of bytes in the strings.
*/
int
#if __STDC__
radixsort(const u_char **l1, int nmemb, const u_char *tab, u_char endbyte)
#else
radixsort(l1, nmemb, tab, endbyte)
const u_char **l1;
register int nmemb;
const u_char *tab;
u_char endbyte;
#endif
radixsort(a, n, tab, endch)
const u_char **a, *tab;
int n;
u_int endch;
{
register int i, indx, t1, t2;
register const u_char **l2;
register const u_char **p;
register const u_char **bot;
register const u_char *tr;
CONTEXT *stack, *stackp;
int c[NBUCKETS + 1], max;
u_char ltab[NBUCKETS];
static void shellsort();
const u_char *tr;
int c;
u_char tr0[256];
if (nmemb <= 1)
return(0);
SETUP;
r_sort_a(a, n, 0, tr, endch);
return (0);
}
/*
* T1 is the constant part of the equation, the number of elements
* represented on the stack between the top and bottom entries.
* It doesn't get rounded as the divide by 2 rounds down (correct
* for a value being subtracted). T2, the nelem value, has to be
* rounded up before each divide because we want an upper bound;
* this could overflow if nmemb is the maximum int.
*/
t1 = ((__rspartition + 1) * (NBUCKETS - 2)) >> 1;
for (i = 0, t2 = nmemb; t2 > __rspartition; i += NBUCKETS - 1)
t2 = ((t2 + 1) >> 1) - t1;
if (i) {
if (!(stack = stackp = (CONTEXT *)malloc(i * sizeof(CONTEXT))))
return(-1);
} else
stack = stackp = NULL;
int
sradixsort(a, n, tab, endch)
const u_char **a, *tab;
int n;
u_int endch;
{
const u_char *tr, **ta;
int c;
u_char tr0[256];
/*
* There are two arrays, one provided by the user (l1), and the
* temporary one (l2). The data is sorted to the temporary stack,
* and then copied back. The speedup of using index to determine
* which stack the data is on and simply swapping stacks back and
* forth, thus avoiding the copy every iteration, turns out to not
* be any faster than the current implementation.
*/
if (!(l2 = (const u_char **)malloc(sizeof(u_char *) * nmemb)))
return(-1);
/*
* Tr references a table of sort weights; multiple entries may
* map to the same weight; EOS char must have the lowest weight.
*/
if (tab)
tr = tab;
SETUP;
if (n < THRESHOLD)
simplesort(a, n, 0, tr, endch);
else {
for (t1 = 0, t2 = endbyte; t1 < t2; ++t1)
ltab[t1] = t1 + 1;
ltab[t2] = 0;
for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1)
ltab[t1] = t1;
tr = ltab;
if ((ta = malloc(n * sizeof(a))) == NULL)
return (-1);
r_sort_b(a, ta, n, 0, tr, endch);
free(ta);
}
/* First sort is entire stack */
bot = l1;
indx = 0;
for (;;) {
/* Clear bucket count array */
bzero((char *)c, sizeof(c));
/*
* Compute number of items that sort to the same bucket
* for this index.
*/
for (p = bot, i = nmemb; --i >= 0;)
++c[tr[(*p++)[indx]]];
/*
* Sum the number of characters into c, dividing the temp
* stack into the right number of buckets for this bucket,
* this index. C contains the cumulative total of keys
* before and included in this bucket, and will later be
* used as an index to the bucket. c[NBUCKETS] contains
* the total number of elements, for determining how many
* elements the last bucket contains. At the same time
* find the largest bucket so it gets pushed first.
*/
for (i = max = t1 = 0, t2 = __rspartition; i <= NBUCKETS; ++i) {
if (c[i] > t2) {
t2 = c[i];
max = i;
}
t1 = c[i] += t1;
}
/*
* Partition the elements into buckets; c decrements through
* the bucket, and ends up pointing to the first element of
* the bucket.
*/
for (i = nmemb; --i >= 0;) {
--p;
l2[--c[tr[(*p)[indx]]]] = *p;
}
/* Copy the partitioned elements back to user stack */
bcopy(l2, bot, nmemb * sizeof(u_char *));
++indx;
/*
* Sort buckets as necessary; don't sort c[0], it's the
* EOS character bucket, and nothing can follow EOS.
*/
for (i = max; i; --i) {
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
continue;
p = bot + t1;
if (nmemb > __rspartition)
STACKPUSH
else
shellsort(p, indx, nmemb, tr);
}
for (i = max + 1; i < NBUCKETS; ++i) {
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
continue;
p = bot + t1;
if (nmemb > __rspartition)
STACKPUSH
else
shellsort(p, indx, nmemb, tr);
}
/* Break out when stack is empty */
STACKPOP
}
free((char *)l2);
free((char *)stack);
return(0);
return (0);
}
/*
* Shellsort (diminishing increment sort) from Data Structures and
* Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
* see also Knuth Vol. 3, page 84. The increments are selected from
* formula (8), page 95. Roughly O(N^3/2).
*/
static void
shellsort(p, indx, nmemb, tr)
register u_char **p, *tr;
register int indx, nmemb;
#define empty(s) (s >= sp)
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
#define swap(a, b, t) t = a, a = b, b = t
/* Unstable, in-place sort. */
void
r_sort_a(a, n, i, tr, endch)
const u_char **a;
int n, i;
const u_char *tr;
u_int endch;
{
register u_char ch, *s1, *s2;
register int incr, *incrp, t1, t2;
static int count[256], nc, bmin;
register int c;
register const u_char **ak, *r;
stack s[SIZE], *sp, *sp0, *sp1, temp;
int *cp, bigc;
const u_char **an, *t, **aj, **top[256];
for (incrp = __rsshell_increments; incr = *incrp++;)
for (t1 = incr; t1 < nmemb; ++t1)
for (t2 = t1 - incr; t2 >= 0;) {
s1 = p[t2] + indx;
s2 = p[t2 + incr] + indx;
while ((ch = tr[*s1++]) == tr[*s2] && ch)
++s2;
if (ch > tr[*s2]) {
s1 = p[t2];
p[t2] = p[t2 + incr];
p[t2 + incr] = s1;
t2 -= incr;
} else
break;
/* Set up stack. */
sp = s;
push(a, n, i);
while (!empty(s)) {
pop(a, n, i);
if (n < THRESHOLD) {
simplesort(a, n, i, tr, endch);
continue;
}
an = a + n;
/* Make character histogram. */
if (nc == 0) {
bmin = 255; /* First occupied bin, excluding eos. */
for (ak = a; ak < an;) {
c = tr[(*ak++)[i]];
if (++count[c] == 1 && c != endch) {
if (c < bmin)
bmin = c;
nc++;
}
}
if (sp + nc > s + SIZE) { /* Get more stack. */
r_sort_a(a, n, i, tr, endch);
continue;
}
}
/*
* Set top[]; push incompletely sorted bins onto stack.
* top[] = pointers to last out-of-place element in bins.
* count[] = counts of elements in bins.
* Before permuting: top[c-1] + count[c] = top[c];
* during deal: top[c] counts down to top[c-1].
*/
sp0 = sp1 = sp; /* Stack position of biggest bin. */
bigc = 2; /* Size of biggest bin. */
if (endch == 0) /* Special case: set top[eos]. */
top[0] = ak = a + count[0];
else {
ak = a;
top[255] = an;
}
for (cp = count + bmin; nc > 0; cp++) {
while (*cp == 0) /* Find next non-empty pile. */
cp++;
if (*cp > 1) {
if (*cp > bigc) {
bigc = *cp;
sp1 = sp;
}
push(ak, *cp, i+1);
}
top[cp-count] = ak += *cp;
nc--;
}
swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */
/*
* Permute misplacements home. Already home: everything
* before aj, and in bin[c], items from top[c] on.
* Inner loop:
* r = next element to put in place;
* ak = top[r[i]] = location to put the next element.
* aj = bottom of 1st disordered bin.
* Outer loop:
* Once the 1st disordered bin is done, ie. aj >= ak,
* aj<-aj + count[c] connects the bins in a linked list;
* reset count[c].
*/
for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0)
for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);)
swap(*ak, r, t);
}
}
/* Stable sort, requiring additional memory. */
void
r_sort_b(a, ta, n, i, tr, endch)
const u_char **a, **ta;
int n, i;
const u_char *tr;
u_int endch;
{
static int count[256], nc, bmin;
register int c;
register const u_char **ak, **ai;
stack s[512], *sp, *sp0, *sp1, temp;
const u_char **top[256];
int *cp, bigc;
sp = s;
push(a, n, i);
while (!empty(s)) {
pop(a, n, i);
if (n < THRESHOLD) {
simplesort(a, n, i, tr, endch);
continue;
}
if (nc == 0) {
bmin = 255;
for (ak = a + n; --ak >= a;) {
c = tr[(*ak)[i]];
if (++count[c] == 1 && c != endch) {
if (c < bmin)
bmin = c;
nc++;
}
}
if (sp + nc > s + SIZE) {
r_sort_b(a, ta, n, i, tr, endch);
continue;
}
}
sp0 = sp1 = sp;
bigc = 2;
if (endch == 0) {
top[0] = ak = a + count[0];
count[0] = 0;
} else {
ak = a;
top[255] = a + n;
count[255] = 0;
}
for (cp = count + bmin; nc > 0; cp++) {
while (*cp == 0)
cp++;
if ((c = *cp) > 1) {
if (c > bigc) {
bigc = c;
sp1 = sp;
}
push(ak, c, i+1);
}
top[cp-count] = ak += c;
*cp = 0; /* Reset count[]. */
nc--;
}
swap(*sp0, *sp1, temp);
for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
*--ak = *--ai;
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
*--top[tr[(*ak)[i]]] = *ak;
}
}
static inline void
simplesort(a, n, b, tr, endch) /* insertion sort */
register const u_char **a;
int n, b;
register const u_char *tr;
u_int endch;
{
register u_char ch;
const u_char **ak, **ai, *s, *t;
for (ak = a+1; --n >= 1; ak++)
for (ai = ak; ai > a; ai--) {
for (s = ai[0] + b, t = ai[-1] + b;
(ch = tr[*s]) != endch; s++, t++)
if (ch != tr[*t])
break;
if (ch >= tr[*t])
break;
swap(ai[0], ai[-1], s);
}
}