Add RCS ids.

This commit is contained in:
mycroft 1994-06-16 05:26:34 +00:00
parent 2c1d50080b
commit 2f86deea8e
6 changed files with 617 additions and 547 deletions

View File

@ -1,6 +1,9 @@
/*- /*-
* Copyright (c) 1991 The Regents of the University of California. * Copyright (c) 1991, 1993
* All rights reserved. * The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -32,14 +35,14 @@
*/ */
#if defined(LIBC_SCCS) && !defined(lint) #if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)heapsort.c 5.1 (Berkeley) 6/4/91";*/ /*static char sccsid[] = "from: @(#)heapsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $"; static char *rcsid = "$Id: heapsort.c,v 1.4 1994/06/16 05:26:34 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <stdlib.h> #include <stdlib.h>
#include <stddef.h>
/* /*
* Swap two areas of size number of bytes. Although qsort(3) permits random * Swap two areas of size number of bytes. Although qsort(3) permits random
@ -48,13 +51,23 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
* arithmetic gets lost in the time required for comparison function calls. * arithmetic gets lost in the time required for comparison function calls.
*/ */
#define SWAP(a, b) { \ #define SWAP(a, b, count, size, tmp) { \
cnt = size; \ count = size; \
do { \ do { \
ch = *a; \ tmp = *a; \
*a++ = *b; \ *a++ = *b; \
*b++ = ch; \ *b++ = tmp; \
} while (--cnt); \ } while (--count); \
}
/* Copy one block of size size to another. */
#define COPY(a, b, count, size, tmp1, tmp2) { \
count = size; \
tmp1 = a; \
tmp2 = b; \
do { \
*tmp1++ = *tmp2++; \
} while (--count); \
} }
/* /*
@ -63,21 +76,59 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
* *
* There two cases. If j == nmemb, select largest of Ki and Kj. If * There two cases. If j == nmemb, select largest of Ki and Kj. If
* j < nmemb, select largest of Ki, Kj and Kj+1. * j < nmemb, select largest of Ki, Kj and Kj+1.
*
* The initial value depends on if we're building the initial heap or
* reconstructing it after saving a value.
*/ */
#define HEAP(initval) { \ #define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
for (i = initval; (j = i * 2) <= nmemb; i = j) { \ for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
p = (char *)bot + j * size; \ par_i = child_i) { \
if (j < nmemb && compar(p, p + size) < 0) { \ child = base + child_i * size; \
p += size; \ if (child_i < nmemb && compar(child, child + size) < 0) { \
++j; \ child += size; \
++child_i; \
} \ } \
t = (char *)bot + i * size; \ par = base + par_i * size; \
if (compar(p, t) <= 0) \ if (compar(child, par) <= 0) \
break; \ break; \
SWAP(t, p); \ SWAP(par, child, count, size, tmp); \
} \
}
/*
* Select the top of the heap and 'heapify'. Since by far the most expensive
* action is the call to the compar function, a considerable optimization
* in the average case can be achieved due to the fact that k, the displaced
* elememt, is ususally quite small, so it would be preferable to first
* heapify, always maintaining the invariant that the larger child is copied
* over its parent's record.
*
* Then, starting from the *bottom* of the heap, finding k's correct place,
* again maintianing the invariant. As a result of the invariant no element
* is 'lost' when k is assigned its correct place in the heap.
*
* The time savings from this optimization are on the order of 15-20% for the
* average case. See Knuth, Vol. 3, page 158, problem 18.
*
* XXX Don't break the #define SELECT line, below. Reiser cpp gets upset.
*/
#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
child = base + child_i * size; \
if (child_i < nmemb && compar(child, child + size) < 0) { \
child += size; \
++child_i; \
} \
par = base + par_i * size; \
COPY(par, child, count, size, tmp1, tmp2); \
} \
for (;;) { \
child_i = par_i; \
par_i = child_i / 2; \
child = base + child_i * size; \
par = base + par_i * size; \
if (child_i == 1 || compar(k, par) < 0) { \
COPY(child, k, count, size, tmp1, tmp2); \
break; \
} \
COPY(child, par, count, size, tmp1, tmp2); \
} \ } \
} }
@ -86,41 +137,49 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
* and worst. While heapsort is faster than the worst case of quicksort, * and worst. While heapsort is faster than the worst case of quicksort,
* the BSD quicksort does median selection so that the chance of finding * the BSD quicksort does median selection so that the chance of finding
* a data set that will trigger the worst case is nonexistent. Heapsort's * a data set that will trigger the worst case is nonexistent. Heapsort's
* only advantage over quicksort is that it requires no additional memory. * only advantage over quicksort is that it requires little additional memory.
*/ */
heapsort(bot, nmemb, size, compar) int
register void *bot; heapsort(vbase, nmemb, size, compar)
register size_t nmemb, size; void *vbase;
size_t nmemb, size;
int (*compar) __P((const void *, const void *)); int (*compar) __P((const void *, const void *));
{ {
register char *p, *t, ch;
register int cnt, i, j, l; register int cnt, i, j, l;
register char tmp, *tmp1, *tmp2;
char *base, *k, *p, *t;
if (nmemb <= 1) if (nmemb <= 1)
return (0); return (0);
if (!size) { if (!size) {
errno = EINVAL; errno = EINVAL;
return (-1); return (-1);
} }
if ((k = malloc(size)) == NULL)
return (-1);
/* /*
* Items are numbered from 1 to nmemb, so offset from size bytes * Items are numbered from 1 to nmemb, so offset from size bytes
* below the starting address. * below the starting address.
*/ */
bot -= size; base = (char *)vbase - size;
for (l = nmemb / 2 + 1; --l;) for (l = nmemb / 2 + 1; --l;)
HEAP(l); CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
/* /*
* For each element of the heap, save the largest element into its * For each element of the heap, save the largest element into its
* final slot, then recreate the heap. * final slot, save the displaced element (k), then recreate the
* heap.
*/ */
while (nmemb > 1) { while (nmemb > 1) {
p = (char *)bot + size; COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
t = (char *)bot + nmemb * size; COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
SWAP(p, t);
--nmemb; --nmemb;
HEAP(1); SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
} }
free(k);
return (0); return (0);
} }

View File

@ -35,7 +35,8 @@
*/ */
#if defined(LIBC_SCCS) && !defined(lint) #if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)merge.c 8.2 (Berkeley) 2/14/94"; /*static char sccsid[] = "from: @(#)merge.c 8.2 (Berkeley) 2/14/94";*/
static char *rcsid = "$Id: merge.c,v 1.2 1994/06/16 05:26:36 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
/* /*

View File

@ -1,5 +1,5 @@
.\" Copyright (c) 1990, 1991 The Regents of the University of California. .\" Copyright (c) 1990, 1991, 1993
.\" All rights reserved. .\" The Regents of the University of California. All rights reserved.
.\" .\"
.\" This code is derived from software contributed to Berkeley by .\" This code is derived from software contributed to Berkeley by
.\" the American National Standards Committee X3, on Information .\" the American National Standards Committee X3, on Information
@ -33,14 +33,14 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.\" from: @(#)qsort.3 6.7 (Berkeley) 6/29/91 .\" from: @(#)qsort.3 8.1 (Berkeley) 6/4/93
.\" $Id: qsort.3,v 1.2 1993/08/01 07:44:22 mycroft Exp $ .\" $Id: qsort.3,v 1.3 1994/06/16 05:26:38 mycroft Exp $
.\" .\"
.Dd June 29, 1991 .Dd June 4, 1993
.Dt QSORT 3 .Dt QSORT 3
.Os .Os
.Sh NAME .Sh NAME
.Nm qsort, heapsort .Nm qsort, heapsort, mergesort
.Nd sort functions .Nd sort functions
.Sh SYNOPSIS .Sh SYNOPSIS
.Fd #include <stdlib.h> .Fd #include <stdlib.h>
@ -48,6 +48,8 @@
.Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)" .Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Ft int .Ft int
.Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)" .Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Ft int
.Fn mergesort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
.Sh DESCRIPTION .Sh DESCRIPTION
The The
.Fn qsort .Fn qsort
@ -55,6 +57,10 @@ function is a modified partition-exchange sort, or quicksort.
The The
.Fn heapsort .Fn heapsort
function is a modified selection sort. function is a modified selection sort.
The
.Fn mergesort
function is a modified merge sort with exponential search
intended for sorting data with pre-existing order.
.Pp .Pp
The The
.Fn qsort .Fn qsort
@ -66,11 +72,20 @@ objects, the initial member of which is pointed to by
.Fa base . .Fa base .
The size of each object is specified by The size of each object is specified by
.Fa size . .Fa size .
.Fn Mergesort
behaves similarly, but
.Em requires
that
.Fa size
be greater than
.Dq "sizeof(void *) / 2" .
.Pp .Pp
The contents of the array are sorted in ascending order according to The contents of the array
.Fa base
are sorted in ascending order according to
a comparison function pointed to by a comparison function pointed to by
.Fa compar , .Fa compar ,
which is called with two arguments that point to the objects being which requires two arguments pointing to the objects being
compared. compared.
.Pp .Pp
The comparison function must return an integer less than, equal to, or The comparison function must return an integer less than, equal to, or
@ -85,6 +100,9 @@ are
.Em not .Em not
stable, that is, if two members compare as equal, their order in stable, that is, if two members compare as equal, their order in
the sorted array is undefined. the sorted array is undefined.
The function
.Fn mergesort
is stable.
.Pp .Pp
The The
.Fn qsort .Fn qsort
@ -93,7 +111,7 @@ a variant of partition-exchange sorting; in particular, see D.E. Knuth's
Algorithm Q. Algorithm Q.
.Fn Qsort .Fn Qsort
takes O N lg N average time. takes O N lg N average time.
This implementation uses median selection to avoid the traditional This implementation uses median selection to avoid its
O N**2 worst-case behavior. O N**2 worst-case behavior.
.Pp .Pp
The The
@ -106,7 +124,28 @@ Its
.Em only .Em only
advantage over advantage over
.Fn qsort .Fn qsort
is that it uses no additional memory. is that it uses almost no additional memory; while
.Fn qsort
does not allocate memory, it is implemented using recursion.
.Pp
The function
.Fn mergesort
requires additional memory of size
.Fa nmemb *
.Fa size
bytes; it should be used only when space is not at a premium.
.Fn Mergesort
is optimized for data with pre-existing order; its worst case
time is O N lg N; its best case is O N.
.Pp
Normally,
.Fn qsort
is faster than
.Fn mergesort
is faster than
.Fn heapsort .
Memory availability and pre-existing order in the data can make this
untrue.
.Sh RETURN VALUES .Sh RETURN VALUES
The The
.Fn qsort .Fn qsort
@ -115,8 +154,10 @@ returns no value.
.Pp .Pp
Upon successful completion, Upon successful completion,
.Fn heapsort .Fn heapsort
returns 0. and
Otherwise, it returns \-1 and the global variable .Fn mergesort
return 0.
Otherwise, they return \-1 and the global variable
.Va errno .Va errno
is set to indicate the error. is set to indicate the error.
.Sh ERRORS .Sh ERRORS
@ -127,11 +168,23 @@ function succeeds unless:
.It Bq Er EINVAL .It Bq Er EINVAL
The The
.Fa size .Fa size
argument is zero. argument is zero, or,
the
.Fa size
argument to
.Fn mergesort
is less than
.Dq "sizeof(void *) / 2" .
.It Bq Er ENOMEM
.Fn Heapsort
or
.Fn mergesort
were unable to allocate memory.
.El
.Sh COMPATIBILITY .Sh COMPATIBILITY
Previous versions of Previous versions of
.Fn qsort .Fn qsort
did not permit the comparison routine to itself call did not permit the comparison routine itself to call
.Fn qsort 3 . .Fn qsort 3 .
This is no longer true. This is no longer true.
.Sh SEE ALSO .Sh SEE ALSO
@ -161,6 +214,18 @@ This is no longer true.
.%T "Sorting and Searching" .%T "Sorting and Searching"
.%P pp. 114-123, 145-149 .%P pp. 114-123, 145-149
.Re .Re
.Rs
.%A Mcilroy, P.M.
.%T "Optimistic Sorting and Information Theoretic Complexity"
.%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms"
.%V January 1992
.Re
.Rs
.%A Bentley, J.L.
.%T "Engineering a Sort Function"
.%J "bentley@research.att.com"
.%V January 1992
.Re
.Sh STANDARDS .Sh STANDARDS
The The
.Fn qsort .Fn qsort

View File

@ -1,6 +1,6 @@
/*- /*-
* Copyright (c) 1980, 1983, 1990 The Regents of the University of California. * Copyright (c) 1992, 1993
* All rights reserved. * The Regents of the University of California. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -32,245 +32,144 @@
*/ */
#if defined(LIBC_SCCS) && !defined(lint) #if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)qsort.c 5.9 (Berkeley) 2/23/91";*/ /*static char sccsid[] = "from: @(#)qsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: qsort.c,v 1.3 1993/08/26 00:48:06 jtc Exp $"; static char *rcsid = "$Id: qsort.c,v 1.4 1994/06/16 05:26:39 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#include <sys/types.h> #include <sys/types.h>
#include <stdlib.h> #include <stdlib.h>
/* static inline char *med3 __P((char *, char *, char *, int (*)()));
* MTHRESH is the smallest partition for which we compare for a median static inline void swapfunc __P((char *, char *, int, int));
* value instead of using the middle value.
*/ #define min(a, b) (a) < (b) ? a : b
#define MTHRESH 6
/* /*
* THRESH is the minimum number of entries in a partition for continued * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
* partitioning.
*/ */
#define THRESH 4 #define swapcode(TYPE, parmi, parmj, n) { \
long i = (n) / sizeof (TYPE); \
register TYPE *pi = (TYPE *) (parmi); \
register TYPE *pj = (TYPE *) (parmj); \
do { \
register TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while (--i > 0); \
}
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
static inline void
swapfunc(a, b, n, swaptype)
char *a, *b;
int n, swaptype;
{
if(swaptype <= 1)
swapcode(long, a, b, n)
else
swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
long t = *(long *)(a); \
*(long *)(a) = *(long *)(b); \
*(long *)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
static inline char *
med3(a, b, c, cmp)
char *a, *b, *c;
int (*cmp)();
{
return cmp(a, b) < 0 ?
(cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
:(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
}
void void
qsort(bot, nmemb, size, compar) qsort(a, n, es, cmp)
void *bot; void *a;
size_t nmemb, size; size_t n, es;
int (*compar) __P((const void *, const void *)); int (*cmp)();
{ {
static void insertion_sort(), quick_sort(); char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
int d, r, swaptype, swap_cnt;
if (nmemb <= 1) loop: SWAPINIT(a, es);
swap_cnt = 0;
if (n < 7) {
for (pm = a + es; pm < (char *) a + n * es; pm += es)
for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return; return;
if (nmemb >= THRESH)
quick_sort(bot, nmemb, size, compar);
else
insertion_sort(bot, nmemb, size, compar);
}
/*
* Swap two areas of size number of bytes. Although qsort(3) permits random
* blocks of memory to be sorted, sorting pointers is almost certainly the
* common case (and, were it not, could easily be made so). Regardless, it
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
* arithmetic gets lost in the time required for comparison function calls.
*/
#define SWAP(a, b) { \
cnt = size; \
do { \
ch = *a; \
*a++ = *b; \
*b++ = ch; \
} while (--cnt); \
}
/*
* Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
* of straight insertion sort after partitioning is complete is better than
* sorting each small partition as it is created. This isn't correct in this
* implementation because comparisons require at least one (and often two)
* function calls and are likely to be the dominating expense of the sort.
* Doing a final insertion sort does more comparisons than are necessary
* because it compares the "edges" and medians of the partitions which are
* known to be already sorted.
*
* This is also the reasoning behind selecting a small THRESH value (see
* Knuth, page 122, equation 26), since the quicksort algorithm does less
* comparisons than the insertion sort.
*/
#define SORT(bot, n) { \
if (n > 1) \
if (n == 2) { \
t1 = bot + size; \
if (compar(t1, bot) < 0) \
SWAP(t1, bot); \
} else \
insertion_sort(bot, n, size, compar); \
}
static void
quick_sort(bot, nmemb, size, compar)
register char *bot;
register int size;
int nmemb, (*compar)();
{
register int cnt;
register u_char ch;
register char *top, *mid, *t1, *t2;
register int n1, n2;
char *bsv;
static void insertion_sort();
/* bot and nmemb must already be set. */
partition:
/* find mid and top elements */
mid = bot + size * (nmemb >> 1);
top = bot + (nmemb - 1) * size;
/*
* Find the median of the first, last and middle element (see Knuth,
* Vol. 3, page 123, Eq. 28). This test order gets the equalities
* right.
*/
if (nmemb >= MTHRESH) {
n1 = compar(bot, mid);
n2 = compar(mid, top);
if (n1 < 0 && n2 > 0)
t1 = compar(bot, top) < 0 ? top : bot;
else if (n1 > 0 && n2 < 0)
t1 = compar(bot, top) > 0 ? top : bot;
else
t1 = mid;
/* if mid element not selected, swap selection there */
if (t1 != mid) {
SWAP(t1, mid);
mid -= size;
}
} }
pm = a + (n / 2) * es;
/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */ if (n > 7) {
#define didswap n1 pl = a;
#define newbot t1 pn = a + (n - 1) * es;
#define replace t2 if (n > 40) {
didswap = 0; d = (n / 8) * es;
for (bsv = bot;;) { pl = med3(pl, pl + d, pl + 2 * d, cmp);
for (; bot < mid && compar(bot, mid) <= 0; bot += size); pm = med3(pm - d, pm, pm + d, cmp);
while (top > mid) { pn = med3(pn - 2 * d, pn - d, pn, cmp);
if (compar(mid, top) <= 0) {
top -= size;
continue;
}
newbot = bot + size; /* value of bot after swap */
if (bot == mid) /* top <-> mid, mid == top */
replace = mid = top;
else { /* bot <-> top */
replace = top;
top -= size;
}
goto swap;
} }
if (bot == mid) pm = med3(pl, pm, pn, cmp);
}
swap(a, pm);
pa = pb = a + es;
pc = pd = a + (n - 1) * es;
for (;;) {
while (pb <= pc && (r = cmp(pb, a)) <= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pa, pb);
pa += es;
}
pb += es;
}
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pc, pd);
pd -= es;
}
pc -= es;
}
if (pb > pc)
break; break;
swap(pb, pc);
/* bot <-> mid, mid == bot */ swap_cnt = 1;
replace = mid; pb += es;
newbot = mid = bot; /* value of bot after swap */ pc -= es;
top -= size;
swap: SWAP(bot, replace);
bot = newbot;
didswap = 1;
} }
if (swap_cnt == 0) { /* Switch to insertion sort */
/* for (pm = a + es; pm < (char *) a + n * es; pm += es)
* Quicksort behaves badly in the presence of data which is already for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
* sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2. pl -= es)
* To avoid this worst case behavior, if a re-partitioning occurs swap(pl, pl - es);
* without swapping any elements, it is not further partitioned and
* is insert sorted. This wins big with almost sorted data sets and
* only loses if the data set is very strangely partitioned. A fix
* for those data sets would be to return prematurely if the insertion
* sort routine is forced to make an excessive number of swaps, and
* continue the partitioning.
*/
if (!didswap) {
insertion_sort(bsv, nmemb, size, compar);
return; return;
} }
/* pn = a + n * es;
* Re-partition or sort as necessary. Note that the mid element r = min(pa - (char *)a, pb - pa);
* itself is correctly positioned and can be ignored. vecswap(a, pb - r, r);
*/ r = min(pd - pc, pn - pd - es);
#define nlower n1 vecswap(pb, pn - r, r);
#define nupper n2 if ((r = pb - pa) > es)
bot = bsv; qsort(a, r / es, es, cmp);
nlower = (mid - bot) / size; /* size of lower partition */ if ((r = pd - pc) > es) {
mid += size; /* Iterate rather than recurse to save stack space */
nupper = nmemb - nlower - 1; /* size of upper partition */ a = pn - r;
n = r / es;
/* goto loop;
* If must call recursively, do it on the smaller partition; this
* bounds the stack to lg N entries.
*/
if (nlower > nupper) {
if (nupper >= THRESH)
quick_sort(mid, nupper, size, compar);
else {
SORT(mid, nupper);
if (nlower < THRESH) {
SORT(bot, nlower);
return;
}
}
nmemb = nlower;
} else {
if (nlower >= THRESH)
quick_sort(bot, nlower, size, compar);
else {
SORT(bot, nlower);
if (nupper < THRESH) {
SORT(mid, nupper);
return;
}
}
bot = mid;
nmemb = nupper;
}
goto partition;
/* NOTREACHED */
}
static void
insertion_sort(bot, nmemb, size, compar)
char *bot;
register int size;
int nmemb, (*compar)();
{
register int cnt;
register u_char ch;
register char *s1, *s2, *t1, *t2, *top;
/*
* A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
* S). Insertion sort has the same worst case as most simple sorts
* (O N^2). It gets used here because it is (O N) in the case of
* sorted data.
*/
top = bot + nmemb * size;
for (t1 = bot + size; t1 < top;) {
for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
if (t1 != (t2 += size)) {
/* Bubble bytes up through each element. */
for (cnt = size; cnt--; ++t1) {
ch = *t1;
for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
*s1 = *s2;
*s1 = ch;
}
} else
t1 += size;
} }
/* qsort(pn - r, r / es, es, cmp);*/
} }

View File

@ -1,5 +1,5 @@
.\" Copyright (c) 1990, 1991 The Regents of the University of California. .\" Copyright (c) 1990, 1991, 1993
.\" All rights reserved. .\" The Regents of the University of California. All rights reserved.
.\" .\"
.\" Redistribution and use in source and binary forms, with or without .\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions .\" modification, are permitted provided that the following conditions
@ -29,10 +29,10 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.\" from: @(#)radixsort.3 5.5 (Berkeley) 4/19/91 .\" from: @(#)radixsort.3 8.2 (Berkeley) 1/27/94
.\" $Id: radixsort.3,v 1.2 1993/08/01 07:44:21 mycroft Exp $ .\" $Id: radixsort.3,v 1.3 1994/06/16 05:26:40 mycroft Exp $
.\" .\"
.Dd April 19, 1991 .Dd January 27, 1994
.Dt RADIXSORT 3 .Dt RADIXSORT 3
.Os .Os
.Sh NAME .Sh NAME
@ -42,27 +42,23 @@
.Fd #include <limits.h> .Fd #include <limits.h>
.Fd #include <stdlib.h> .Fd #include <stdlib.h>
.Ft int .Ft int
.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_char endbyte" .Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
.Ft int
.Fn sradixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
.Sh DESCRIPTION .Sh DESCRIPTION
The The
.Fn radixsort .Fn radixsort
function and
is a modified radix sort. .Fn sradixsort
functions
are implementations of radix sort.
.Pp .Pp
The These functions sort an array of pointers to byte strings, the initial
.Fn radixsort member of which is referenced by
function sorts an array of
.Fa nmemb
pointers to byte strings, the initial member of which is referenced
by
.Fa base . .Fa base .
The byte strings may contain any values; the end of each string The byte strings may contain any values; the end of each string
is denoted by the user-specified value is denoted by the user-specified value
.Fa endbyte . .Fa endbyte .
The contents of the array are sorted in ascending order according
to the
.Tn ASCII
order of the byte strings they reference.
.Pp .Pp
Applications may specify a sort order by providing the Applications may specify a sort order by providing the
.Fa table .Fa table
@ -74,38 +70,58 @@ must reference an array of
.Dv UCHAR_MAX .Dv UCHAR_MAX
+ 1 bytes which contains the sort + 1 bytes which contains the sort
weight of each possible byte value. weight of each possible byte value.
The end-of-string byte must have a sort weight of 0. The end-of-string byte must have a sort weight of 0 or 255
(for sorting in reverse order).
More than one byte may have the same sort weight. More than one byte may have the same sort weight.
The The
.Fa table .Fa table
argument argument
is useful for applications which wish to sort different characters is useful for applications which wish to sort different characters
equally; for example, providing a table with the same weights equally, for example, providing a table with the same weights
for A-Z as for a-z will result in a case-insensitive sort. for A-Z as for a-z will result in a case-insensitive sort.
If
.Fa table
is NULL, the contents of the array are sorted in ascending order
according to the
.Tn ASCII
order of the byte strings they reference and
.Fa endbyte
has a sorting weight of 0.
.Pp
The
.Fn sradixsort
function is stable, that is, if two elements compare as equal, their
order in the sorted array is unchanged.
The
.Fn sradixsort
function uses additional memory sufficient to hold
.Fa nmemb
pointers.
.Pp .Pp
The The
.Fn radixsort .Fn radixsort
function function is not stable, but uses no additional memory.
is stable, that is, if two elements compare as equal, their order in
the sorted array is unchanged.
.Pp .Pp
The These functions are variants of most-significant-byte radix sorting; in
.Fn radixsort particular, see D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
function They take linear time relative to the number of bytes in the strings.
is a variant of most-significant-byte radix sorting; in particular, see
D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
The
.Fn radixsort
function
takes linear time relative to the number of bytes in the strings.
.Sh RETURN VALUES .Sh RETURN VALUES
Upon successful completion 0 is returned. Upon successful completion 0 is returned.
Otherwise, \-1 is returned and the global variable Otherwise, \-1 is returned and the global variable
.Va errno .Va errno
is set to indicate the error. is set to indicate the error.
.Sh ERRORS .Sh ERRORS
The .Bl -tag -width Er
.Fn radixsort .It Bq Er EINVAL
The value of the
.Fa endbyte
element of
.Fa table
is not 0 or 255.
.El
.Pp
Additionally, the
.Fn sradixsort
function function
may fail and set may fail and set
.Va errno .Va errno
@ -131,14 +147,15 @@ for any of the errors specified for the library routine
.%V Vol. 16 .%V Vol. 16
.%N No. 6 .%N No. 6
.Re .Re
.Rs
.%A McIlroy, P.
.%D 1993
.%B "Engineering Radix Sort"
.%T "Computing Systems"
.%V Vol. 6:1
.%P pp. 5-27
.Re
.Sh HISTORY .Sh HISTORY
The The
.Fn radixsort .Fn radixsort
function is function first appeared in 4.4BSD.
.Ud .
.Sh BUGS
The
.Fa nmemb
argument
must be less than the maximum integer,
.Dv INT_MAX .

View File

@ -1,6 +1,9 @@
/*- /*-
* Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 1990, 1993
* All rights reserved. * The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy and by Dan Bernstein at New York University,
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -32,259 +35,285 @@
*/ */
#if defined(LIBC_SCCS) && !defined(lint) #if defined(LIBC_SCCS) && !defined(lint)
/*static char *sccsid = "from: @(#)radixsort.c 5.7 (Berkeley) 2/23/91";*/ /*static char sccsid[] = "from: @(#)radixsort.c 8.1 (Berkeley) 6/4/93";*/
static char *rcsid = "$Id: radixsort.c,v 1.3 1993/08/26 00:48:07 jtc Exp $"; static char *rcsid = "$Id: radixsort.c,v 1.4 1994/06/16 05:26:44 mycroft Exp $";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
/*
* Radixsort routines.
*
* Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
* Use radixsort(a, n, trace, endchar) for this case.
*
* For stable sorting (using N extra pointers) use sradixsort(), which calls
* r_sort_b().
*
* For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
* "Engineering Radix Sort".
*/
#include <sys/types.h> #include <sys/types.h>
#include <limits.h>
#include <stdlib.h> #include <stdlib.h>
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <errno.h>
/* typedef struct {
* __rspartition is the cutoff point for a further partitioning instead const u_char **sa;
* of a shellsort. If it changes check __rsshell_increments. Both of int sn, si;
* these are exported, as the best values are data dependent. } stack;
*/
#define NPARTITION 40
int __rspartition = NPARTITION;
int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 };
/* static inline void simplesort
* Stackp points to context structures, where each structure schedules a __P((const u_char **, int, int, const u_char *, u_int));
* partitioning. Radixsort exits when the stack is empty. static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
* static void r_sort_b __P((const u_char **,
* If the buckets are placed on the stack randomly, the worst case is when const u_char **, int, int, const u_char *, u_int));
* all the buckets but one contain (npartitions + 1) elements and the bucket
* pushed on the stack last contains the rest of the elements. In this case,
* stack growth is bounded by:
*
* limit = (nelements / (npartitions + 1)) - 1;
*
* This is a very large number, 52,377,648 for the maximum 32-bit signed int.
*
* By forcing the largest bucket to be pushed on the stack first, the worst
* case is when all but two buckets each contain (npartitions + 1) elements,
* with the remaining elements split equally between the first and last
* buckets pushed on the stack. In this case, stack growth is bounded when:
*
* for (partition_cnt = 0; nelements > npartitions; ++partition_cnt)
* nelements =
* (nelements - (npartitions + 1) * (nbuckets - 2)) / 2;
* The bound is:
*
* limit = partition_cnt * (nbuckets - 1);
*
* This is a much smaller number, 4590 for the maximum 32-bit signed int.
*/
#define NBUCKETS (UCHAR_MAX + 1)
typedef struct _stack { #define THRESHOLD 20 /* Divert to simplesort(). */
const u_char **bot; #define SIZE 512 /* Default stack size. */
int indx, nmemb;
} CONTEXT;
#define STACKPUSH { \ #define SETUP { \
stackp->bot = p; \ if (tab == NULL) { \
stackp->nmemb = nmemb; \ tr = tr0; \
stackp->indx = indx; \ for (c = 0; c < endch; c++) \
++stackp; \ tr0[c] = c + 1; \
} tr0[c] = 0; \
#define STACKPOP { \ for (c++; c < 256; c++) \
if (stackp == stack) \ tr0[c] = c; \
break; \ endch = 0; \
--stackp; \ } else { \
bot = stackp->bot; \ endch = tab[endch]; \
nmemb = stackp->nmemb; \ tr = tab; \
indx = stackp->indx; \ if (endch != 0 && endch != 255) { \
errno = EINVAL; \
return (-1); \
} \
} \
} }
/*
* A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5,
* Ex. 10 and 12. Also, "Three Partition Refinement Algorithms, Paige
* and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987.
*
* This uses a simple sort as soon as a bucket crosses a cutoff point,
* rather than sorting the entire list after partitioning is finished.
* This should be an advantage.
*
* This is pure MSD instead of LSD of some number of MSD, switching to
* the simple sort as soon as possible. Takes linear time relative to
* the number of bytes in the strings.
*/
int int
#if __STDC__ radixsort(a, n, tab, endch)
radixsort(const u_char **l1, int nmemb, const u_char *tab, u_char endbyte) const u_char **a, *tab;
#else int n;
radixsort(l1, nmemb, tab, endbyte) u_int endch;
const u_char **l1;
register int nmemb;
const u_char *tab;
u_char endbyte;
#endif
{ {
register int i, indx, t1, t2; const u_char *tr;
register const u_char **l2; int c;
register const u_char **p; u_char tr0[256];
register const u_char **bot;
register const u_char *tr;
CONTEXT *stack, *stackp;
int c[NBUCKETS + 1], max;
u_char ltab[NBUCKETS];
static void shellsort();
if (nmemb <= 1) SETUP;
return(0); r_sort_a(a, n, 0, tr, endch);
return (0);
}
/* int
* T1 is the constant part of the equation, the number of elements sradixsort(a, n, tab, endch)
* represented on the stack between the top and bottom entries. const u_char **a, *tab;
* It doesn't get rounded as the divide by 2 rounds down (correct int n;
* for a value being subtracted). T2, the nelem value, has to be u_int endch;
* rounded up before each divide because we want an upper bound; {
* this could overflow if nmemb is the maximum int. const u_char *tr, **ta;
*/ int c;
t1 = ((__rspartition + 1) * (NBUCKETS - 2)) >> 1; u_char tr0[256];
for (i = 0, t2 = nmemb; t2 > __rspartition; i += NBUCKETS - 1)
t2 = ((t2 + 1) >> 1) - t1;
if (i) {
if (!(stack = stackp = (CONTEXT *)malloc(i * sizeof(CONTEXT))))
return(-1);
} else
stack = stackp = NULL;
/* SETUP;
* There are two arrays, one provided by the user (l1), and the if (n < THRESHOLD)
* temporary one (l2). The data is sorted to the temporary stack, simplesort(a, n, 0, tr, endch);
* and then copied back. The speedup of using index to determine
* which stack the data is on and simply swapping stacks back and
* forth, thus avoiding the copy every iteration, turns out to not
* be any faster than the current implementation.
*/
if (!(l2 = (const u_char **)malloc(sizeof(u_char *) * nmemb)))
return(-1);
/*
* Tr references a table of sort weights; multiple entries may
* map to the same weight; EOS char must have the lowest weight.
*/
if (tab)
tr = tab;
else { else {
for (t1 = 0, t2 = endbyte; t1 < t2; ++t1) if ((ta = malloc(n * sizeof(a))) == NULL)
ltab[t1] = t1 + 1; return (-1);
ltab[t2] = 0; r_sort_b(a, ta, n, 0, tr, endch);
for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1) free(ta);
ltab[t1] = t1;
tr = ltab;
} }
return (0);
/* First sort is entire stack */
bot = l1;
indx = 0;
for (;;) {
/* Clear bucket count array */
bzero((char *)c, sizeof(c));
/*
* Compute number of items that sort to the same bucket
* for this index.
*/
for (p = bot, i = nmemb; --i >= 0;)
++c[tr[(*p++)[indx]]];
/*
* Sum the number of characters into c, dividing the temp
* stack into the right number of buckets for this bucket,
* this index. C contains the cumulative total of keys
* before and included in this bucket, and will later be
* used as an index to the bucket. c[NBUCKETS] contains
* the total number of elements, for determining how many
* elements the last bucket contains. At the same time
* find the largest bucket so it gets pushed first.
*/
for (i = max = t1 = 0, t2 = __rspartition; i <= NBUCKETS; ++i) {
if (c[i] > t2) {
t2 = c[i];
max = i;
}
t1 = c[i] += t1;
}
/*
* Partition the elements into buckets; c decrements through
* the bucket, and ends up pointing to the first element of
* the bucket.
*/
for (i = nmemb; --i >= 0;) {
--p;
l2[--c[tr[(*p)[indx]]]] = *p;
}
/* Copy the partitioned elements back to user stack */
bcopy(l2, bot, nmemb * sizeof(u_char *));
++indx;
/*
* Sort buckets as necessary; don't sort c[0], it's the
* EOS character bucket, and nothing can follow EOS.
*/
for (i = max; i; --i) {
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
continue;
p = bot + t1;
if (nmemb > __rspartition)
STACKPUSH
else
shellsort(p, indx, nmemb, tr);
}
for (i = max + 1; i < NBUCKETS; ++i) {
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
continue;
p = bot + t1;
if (nmemb > __rspartition)
STACKPUSH
else
shellsort(p, indx, nmemb, tr);
}
/* Break out when stack is empty */
STACKPOP
}
free((char *)l2);
free((char *)stack);
return(0);
} }
/* #define empty(s) (s >= sp)
* Shellsort (diminishing increment sort) from Data Structures and #define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
* Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; #define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
* see also Knuth Vol. 3, page 84. The increments are selected from #define swap(a, b, t) t = a, a = b, b = t
* formula (8), page 95. Roughly O(N^3/2).
*/ /* Unstable, in-place sort. */
static void void
shellsort(p, indx, nmemb, tr) r_sort_a(a, n, i, tr, endch)
register u_char **p, *tr; const u_char **a;
register int indx, nmemb; int n, i;
const u_char *tr;
u_int endch;
{ {
register u_char ch, *s1, *s2; static int count[256], nc, bmin;
register int incr, *incrp, t1, t2; register int c;
register const u_char **ak, *r;
stack s[SIZE], *sp, *sp0, *sp1, temp;
int *cp, bigc;
const u_char **an, *t, **aj, **top[256];
for (incrp = __rsshell_increments; incr = *incrp++;) /* Set up stack. */
for (t1 = incr; t1 < nmemb; ++t1) sp = s;
for (t2 = t1 - incr; t2 >= 0;) { push(a, n, i);
s1 = p[t2] + indx; while (!empty(s)) {
s2 = p[t2 + incr] + indx; pop(a, n, i);
while ((ch = tr[*s1++]) == tr[*s2] && ch) if (n < THRESHOLD) {
++s2; simplesort(a, n, i, tr, endch);
if (ch > tr[*s2]) { continue;
s1 = p[t2]; }
p[t2] = p[t2 + incr]; an = a + n;
p[t2 + incr] = s1;
t2 -= incr; /* Make character histogram. */
} else if (nc == 0) {
break; bmin = 255; /* First occupied bin, excluding eos. */
for (ak = a; ak < an;) {
c = tr[(*ak++)[i]];
if (++count[c] == 1 && c != endch) {
if (c < bmin)
bmin = c;
nc++;
}
} }
if (sp + nc > s + SIZE) { /* Get more stack. */
r_sort_a(a, n, i, tr, endch);
continue;
}
}
/*
* Set top[]; push incompletely sorted bins onto stack.
* top[] = pointers to last out-of-place element in bins.
* count[] = counts of elements in bins.
* Before permuting: top[c-1] + count[c] = top[c];
* during deal: top[c] counts down to top[c-1].
*/
sp0 = sp1 = sp; /* Stack position of biggest bin. */
bigc = 2; /* Size of biggest bin. */
if (endch == 0) /* Special case: set top[eos]. */
top[0] = ak = a + count[0];
else {
ak = a;
top[255] = an;
}
for (cp = count + bmin; nc > 0; cp++) {
while (*cp == 0) /* Find next non-empty pile. */
cp++;
if (*cp > 1) {
if (*cp > bigc) {
bigc = *cp;
sp1 = sp;
}
push(ak, *cp, i+1);
}
top[cp-count] = ak += *cp;
nc--;
}
swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */
/*
* Permute misplacements home. Already home: everything
* before aj, and in bin[c], items from top[c] on.
* Inner loop:
* r = next element to put in place;
* ak = top[r[i]] = location to put the next element.
* aj = bottom of 1st disordered bin.
* Outer loop:
* Once the 1st disordered bin is done, ie. aj >= ak,
* aj<-aj + count[c] connects the bins in a linked list;
* reset count[c].
*/
for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0)
for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);)
swap(*ak, r, t);
}
}
/* Stable sort, requiring additional memory. */
void
r_sort_b(a, ta, n, i, tr, endch)
const u_char **a, **ta;
int n, i;
const u_char *tr;
u_int endch;
{
static int count[256], nc, bmin;
register int c;
register const u_char **ak, **ai;
stack s[512], *sp, *sp0, *sp1, temp;
const u_char **top[256];
int *cp, bigc;
sp = s;
push(a, n, i);
while (!empty(s)) {
pop(a, n, i);
if (n < THRESHOLD) {
simplesort(a, n, i, tr, endch);
continue;
}
if (nc == 0) {
bmin = 255;
for (ak = a + n; --ak >= a;) {
c = tr[(*ak)[i]];
if (++count[c] == 1 && c != endch) {
if (c < bmin)
bmin = c;
nc++;
}
}
if (sp + nc > s + SIZE) {
r_sort_b(a, ta, n, i, tr, endch);
continue;
}
}
sp0 = sp1 = sp;
bigc = 2;
if (endch == 0) {
top[0] = ak = a + count[0];
count[0] = 0;
} else {
ak = a;
top[255] = a + n;
count[255] = 0;
}
for (cp = count + bmin; nc > 0; cp++) {
while (*cp == 0)
cp++;
if ((c = *cp) > 1) {
if (c > bigc) {
bigc = c;
sp1 = sp;
}
push(ak, c, i+1);
}
top[cp-count] = ak += c;
*cp = 0; /* Reset count[]. */
nc--;
}
swap(*sp0, *sp1, temp);
for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
*--ak = *--ai;
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
*--top[tr[(*ak)[i]]] = *ak;
}
}
static inline void
simplesort(a, n, b, tr, endch) /* insertion sort */
register const u_char **a;
int n, b;
register const u_char *tr;
u_int endch;
{
register u_char ch;
const u_char **ak, **ai, *s, *t;
for (ak = a+1; --n >= 1; ak++)
for (ai = ak; ai > a; ai--) {
for (s = ai[0] + b, t = ai[-1] + b;
(ch = tr[*s]) != endch; s++, t++)
if (ch != tr[*t])
break;
if (ch >= tr[*t])
break;
swap(ai[0], ai[-1], s);
}
} }