Add RCS ids.
This commit is contained in:
parent
2c1d50080b
commit
2f86deea8e
|
@ -1,6 +1,9 @@
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 1991 The Regents of the University of California.
|
* Copyright (c) 1991, 1993
|
||||||
* All rights reserved.
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -32,14 +35,14 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(LIBC_SCCS) && !defined(lint)
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
/*static char *sccsid = "from: @(#)heapsort.c 5.1 (Berkeley) 6/4/91";*/
|
/*static char sccsid[] = "from: @(#)heapsort.c 8.1 (Berkeley) 6/4/93";*/
|
||||||
static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
|
static char *rcsid = "$Id: heapsort.c,v 1.4 1994/06/16 05:26:34 mycroft Exp $";
|
||||||
#endif /* LIBC_SCCS and not lint */
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
|
||||||
#include <sys/cdefs.h>
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Swap two areas of size number of bytes. Although qsort(3) permits random
|
* Swap two areas of size number of bytes. Although qsort(3) permits random
|
||||||
|
@ -48,13 +51,23 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
|
||||||
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
|
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
|
||||||
* arithmetic gets lost in the time required for comparison function calls.
|
* arithmetic gets lost in the time required for comparison function calls.
|
||||||
*/
|
*/
|
||||||
#define SWAP(a, b) { \
|
#define SWAP(a, b, count, size, tmp) { \
|
||||||
cnt = size; \
|
count = size; \
|
||||||
do { \
|
do { \
|
||||||
ch = *a; \
|
tmp = *a; \
|
||||||
*a++ = *b; \
|
*a++ = *b; \
|
||||||
*b++ = ch; \
|
*b++ = tmp; \
|
||||||
} while (--cnt); \
|
} while (--count); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy one block of size size to another. */
|
||||||
|
#define COPY(a, b, count, size, tmp1, tmp2) { \
|
||||||
|
count = size; \
|
||||||
|
tmp1 = a; \
|
||||||
|
tmp2 = b; \
|
||||||
|
do { \
|
||||||
|
*tmp1++ = *tmp2++; \
|
||||||
|
} while (--count); \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -63,21 +76,59 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
|
||||||
*
|
*
|
||||||
* There two cases. If j == nmemb, select largest of Ki and Kj. If
|
* There two cases. If j == nmemb, select largest of Ki and Kj. If
|
||||||
* j < nmemb, select largest of Ki, Kj and Kj+1.
|
* j < nmemb, select largest of Ki, Kj and Kj+1.
|
||||||
*
|
|
||||||
* The initial value depends on if we're building the initial heap or
|
|
||||||
* reconstructing it after saving a value.
|
|
||||||
*/
|
*/
|
||||||
#define HEAP(initval) { \
|
#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
|
||||||
for (i = initval; (j = i * 2) <= nmemb; i = j) { \
|
for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
|
||||||
p = (char *)bot + j * size; \
|
par_i = child_i) { \
|
||||||
if (j < nmemb && compar(p, p + size) < 0) { \
|
child = base + child_i * size; \
|
||||||
p += size; \
|
if (child_i < nmemb && compar(child, child + size) < 0) { \
|
||||||
++j; \
|
child += size; \
|
||||||
|
++child_i; \
|
||||||
} \
|
} \
|
||||||
t = (char *)bot + i * size; \
|
par = base + par_i * size; \
|
||||||
if (compar(p, t) <= 0) \
|
if (compar(child, par) <= 0) \
|
||||||
break; \
|
break; \
|
||||||
SWAP(t, p); \
|
SWAP(par, child, count, size, tmp); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Select the top of the heap and 'heapify'. Since by far the most expensive
|
||||||
|
* action is the call to the compar function, a considerable optimization
|
||||||
|
* in the average case can be achieved due to the fact that k, the displaced
|
||||||
|
* elememt, is ususally quite small, so it would be preferable to first
|
||||||
|
* heapify, always maintaining the invariant that the larger child is copied
|
||||||
|
* over its parent's record.
|
||||||
|
*
|
||||||
|
* Then, starting from the *bottom* of the heap, finding k's correct place,
|
||||||
|
* again maintianing the invariant. As a result of the invariant no element
|
||||||
|
* is 'lost' when k is assigned its correct place in the heap.
|
||||||
|
*
|
||||||
|
* The time savings from this optimization are on the order of 15-20% for the
|
||||||
|
* average case. See Knuth, Vol. 3, page 158, problem 18.
|
||||||
|
*
|
||||||
|
* XXX Don't break the #define SELECT line, below. Reiser cpp gets upset.
|
||||||
|
*/
|
||||||
|
#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
|
||||||
|
for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
|
||||||
|
child = base + child_i * size; \
|
||||||
|
if (child_i < nmemb && compar(child, child + size) < 0) { \
|
||||||
|
child += size; \
|
||||||
|
++child_i; \
|
||||||
|
} \
|
||||||
|
par = base + par_i * size; \
|
||||||
|
COPY(par, child, count, size, tmp1, tmp2); \
|
||||||
|
} \
|
||||||
|
for (;;) { \
|
||||||
|
child_i = par_i; \
|
||||||
|
par_i = child_i / 2; \
|
||||||
|
child = base + child_i * size; \
|
||||||
|
par = base + par_i * size; \
|
||||||
|
if (child_i == 1 || compar(k, par) < 0) { \
|
||||||
|
COPY(child, k, count, size, tmp1, tmp2); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
COPY(child, par, count, size, tmp1, tmp2); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,41 +137,49 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
|
||||||
* and worst. While heapsort is faster than the worst case of quicksort,
|
* and worst. While heapsort is faster than the worst case of quicksort,
|
||||||
* the BSD quicksort does median selection so that the chance of finding
|
* the BSD quicksort does median selection so that the chance of finding
|
||||||
* a data set that will trigger the worst case is nonexistent. Heapsort's
|
* a data set that will trigger the worst case is nonexistent. Heapsort's
|
||||||
* only advantage over quicksort is that it requires no additional memory.
|
* only advantage over quicksort is that it requires little additional memory.
|
||||||
*/
|
*/
|
||||||
heapsort(bot, nmemb, size, compar)
|
int
|
||||||
register void *bot;
|
heapsort(vbase, nmemb, size, compar)
|
||||||
register size_t nmemb, size;
|
void *vbase;
|
||||||
|
size_t nmemb, size;
|
||||||
int (*compar) __P((const void *, const void *));
|
int (*compar) __P((const void *, const void *));
|
||||||
{
|
{
|
||||||
register char *p, *t, ch;
|
|
||||||
register int cnt, i, j, l;
|
register int cnt, i, j, l;
|
||||||
|
register char tmp, *tmp1, *tmp2;
|
||||||
|
char *base, *k, *p, *t;
|
||||||
|
|
||||||
if (nmemb <= 1)
|
if (nmemb <= 1)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
if (!size) {
|
if (!size) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((k = malloc(size)) == NULL)
|
||||||
|
return (-1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Items are numbered from 1 to nmemb, so offset from size bytes
|
* Items are numbered from 1 to nmemb, so offset from size bytes
|
||||||
* below the starting address.
|
* below the starting address.
|
||||||
*/
|
*/
|
||||||
bot -= size;
|
base = (char *)vbase - size;
|
||||||
|
|
||||||
for (l = nmemb / 2 + 1; --l;)
|
for (l = nmemb / 2 + 1; --l;)
|
||||||
HEAP(l);
|
CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For each element of the heap, save the largest element into its
|
* For each element of the heap, save the largest element into its
|
||||||
* final slot, then recreate the heap.
|
* final slot, save the displaced element (k), then recreate the
|
||||||
|
* heap.
|
||||||
*/
|
*/
|
||||||
while (nmemb > 1) {
|
while (nmemb > 1) {
|
||||||
p = (char *)bot + size;
|
COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
|
||||||
t = (char *)bot + nmemb * size;
|
COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
|
||||||
SWAP(p, t);
|
|
||||||
--nmemb;
|
--nmemb;
|
||||||
HEAP(1);
|
SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
|
||||||
}
|
}
|
||||||
|
free(k);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(LIBC_SCCS) && !defined(lint)
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
static char sccsid[] = "@(#)merge.c 8.2 (Berkeley) 2/14/94";
|
/*static char sccsid[] = "from: @(#)merge.c 8.2 (Berkeley) 2/14/94";*/
|
||||||
|
static char *rcsid = "$Id: merge.c,v 1.2 1994/06/16 05:26:36 mycroft Exp $";
|
||||||
#endif /* LIBC_SCCS and not lint */
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" Copyright (c) 1990, 1991 The Regents of the University of California.
|
.\" Copyright (c) 1990, 1991, 1993
|
||||||
.\" All rights reserved.
|
.\" The Regents of the University of California. All rights reserved.
|
||||||
.\"
|
.\"
|
||||||
.\" This code is derived from software contributed to Berkeley by
|
.\" This code is derived from software contributed to Berkeley by
|
||||||
.\" the American National Standards Committee X3, on Information
|
.\" the American National Standards Committee X3, on Information
|
||||||
|
@ -33,14 +33,14 @@
|
||||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
.\" SUCH DAMAGE.
|
.\" SUCH DAMAGE.
|
||||||
.\"
|
.\"
|
||||||
.\" from: @(#)qsort.3 6.7 (Berkeley) 6/29/91
|
.\" from: @(#)qsort.3 8.1 (Berkeley) 6/4/93
|
||||||
.\" $Id: qsort.3,v 1.2 1993/08/01 07:44:22 mycroft Exp $
|
.\" $Id: qsort.3,v 1.3 1994/06/16 05:26:38 mycroft Exp $
|
||||||
.\"
|
.\"
|
||||||
.Dd June 29, 1991
|
.Dd June 4, 1993
|
||||||
.Dt QSORT 3
|
.Dt QSORT 3
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
.Nm qsort, heapsort
|
.Nm qsort, heapsort, mergesort
|
||||||
.Nd sort functions
|
.Nd sort functions
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Fd #include <stdlib.h>
|
.Fd #include <stdlib.h>
|
||||||
|
@ -48,6 +48,8 @@
|
||||||
.Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
|
.Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
|
||||||
.Ft int
|
.Ft int
|
||||||
.Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
|
.Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
|
||||||
|
.Ft int
|
||||||
|
.Fn mergesort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
The
|
The
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
|
@ -55,6 +57,10 @@ function is a modified partition-exchange sort, or quicksort.
|
||||||
The
|
The
|
||||||
.Fn heapsort
|
.Fn heapsort
|
||||||
function is a modified selection sort.
|
function is a modified selection sort.
|
||||||
|
The
|
||||||
|
.Fn mergesort
|
||||||
|
function is a modified merge sort with exponential search
|
||||||
|
intended for sorting data with pre-existing order.
|
||||||
.Pp
|
.Pp
|
||||||
The
|
The
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
|
@ -66,11 +72,20 @@ objects, the initial member of which is pointed to by
|
||||||
.Fa base .
|
.Fa base .
|
||||||
The size of each object is specified by
|
The size of each object is specified by
|
||||||
.Fa size .
|
.Fa size .
|
||||||
|
.Fn Mergesort
|
||||||
|
behaves similarly, but
|
||||||
|
.Em requires
|
||||||
|
that
|
||||||
|
.Fa size
|
||||||
|
be greater than
|
||||||
|
.Dq "sizeof(void *) / 2" .
|
||||||
.Pp
|
.Pp
|
||||||
The contents of the array are sorted in ascending order according to
|
The contents of the array
|
||||||
|
.Fa base
|
||||||
|
are sorted in ascending order according to
|
||||||
a comparison function pointed to by
|
a comparison function pointed to by
|
||||||
.Fa compar ,
|
.Fa compar ,
|
||||||
which is called with two arguments that point to the objects being
|
which requires two arguments pointing to the objects being
|
||||||
compared.
|
compared.
|
||||||
.Pp
|
.Pp
|
||||||
The comparison function must return an integer less than, equal to, or
|
The comparison function must return an integer less than, equal to, or
|
||||||
|
@ -85,6 +100,9 @@ are
|
||||||
.Em not
|
.Em not
|
||||||
stable, that is, if two members compare as equal, their order in
|
stable, that is, if two members compare as equal, their order in
|
||||||
the sorted array is undefined.
|
the sorted array is undefined.
|
||||||
|
The function
|
||||||
|
.Fn mergesort
|
||||||
|
is stable.
|
||||||
.Pp
|
.Pp
|
||||||
The
|
The
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
|
@ -93,7 +111,7 @@ a variant of partition-exchange sorting; in particular, see D.E. Knuth's
|
||||||
Algorithm Q.
|
Algorithm Q.
|
||||||
.Fn Qsort
|
.Fn Qsort
|
||||||
takes O N lg N average time.
|
takes O N lg N average time.
|
||||||
This implementation uses median selection to avoid the traditional
|
This implementation uses median selection to avoid its
|
||||||
O N**2 worst-case behavior.
|
O N**2 worst-case behavior.
|
||||||
.Pp
|
.Pp
|
||||||
The
|
The
|
||||||
|
@ -106,7 +124,28 @@ Its
|
||||||
.Em only
|
.Em only
|
||||||
advantage over
|
advantage over
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
is that it uses no additional memory.
|
is that it uses almost no additional memory; while
|
||||||
|
.Fn qsort
|
||||||
|
does not allocate memory, it is implemented using recursion.
|
||||||
|
.Pp
|
||||||
|
The function
|
||||||
|
.Fn mergesort
|
||||||
|
requires additional memory of size
|
||||||
|
.Fa nmemb *
|
||||||
|
.Fa size
|
||||||
|
bytes; it should be used only when space is not at a premium.
|
||||||
|
.Fn Mergesort
|
||||||
|
is optimized for data with pre-existing order; its worst case
|
||||||
|
time is O N lg N; its best case is O N.
|
||||||
|
.Pp
|
||||||
|
Normally,
|
||||||
|
.Fn qsort
|
||||||
|
is faster than
|
||||||
|
.Fn mergesort
|
||||||
|
is faster than
|
||||||
|
.Fn heapsort .
|
||||||
|
Memory availability and pre-existing order in the data can make this
|
||||||
|
untrue.
|
||||||
.Sh RETURN VALUES
|
.Sh RETURN VALUES
|
||||||
The
|
The
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
|
@ -115,8 +154,10 @@ returns no value.
|
||||||
.Pp
|
.Pp
|
||||||
Upon successful completion,
|
Upon successful completion,
|
||||||
.Fn heapsort
|
.Fn heapsort
|
||||||
returns 0.
|
and
|
||||||
Otherwise, it returns \-1 and the global variable
|
.Fn mergesort
|
||||||
|
return 0.
|
||||||
|
Otherwise, they return \-1 and the global variable
|
||||||
.Va errno
|
.Va errno
|
||||||
is set to indicate the error.
|
is set to indicate the error.
|
||||||
.Sh ERRORS
|
.Sh ERRORS
|
||||||
|
@ -127,11 +168,23 @@ function succeeds unless:
|
||||||
.It Bq Er EINVAL
|
.It Bq Er EINVAL
|
||||||
The
|
The
|
||||||
.Fa size
|
.Fa size
|
||||||
argument is zero.
|
argument is zero, or,
|
||||||
|
the
|
||||||
|
.Fa size
|
||||||
|
argument to
|
||||||
|
.Fn mergesort
|
||||||
|
is less than
|
||||||
|
.Dq "sizeof(void *) / 2" .
|
||||||
|
.It Bq Er ENOMEM
|
||||||
|
.Fn Heapsort
|
||||||
|
or
|
||||||
|
.Fn mergesort
|
||||||
|
were unable to allocate memory.
|
||||||
|
.El
|
||||||
.Sh COMPATIBILITY
|
.Sh COMPATIBILITY
|
||||||
Previous versions of
|
Previous versions of
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
did not permit the comparison routine to itself call
|
did not permit the comparison routine itself to call
|
||||||
.Fn qsort 3 .
|
.Fn qsort 3 .
|
||||||
This is no longer true.
|
This is no longer true.
|
||||||
.Sh SEE ALSO
|
.Sh SEE ALSO
|
||||||
|
@ -161,6 +214,18 @@ This is no longer true.
|
||||||
.%T "Sorting and Searching"
|
.%T "Sorting and Searching"
|
||||||
.%P pp. 114-123, 145-149
|
.%P pp. 114-123, 145-149
|
||||||
.Re
|
.Re
|
||||||
|
.Rs
|
||||||
|
.%A Mcilroy, P.M.
|
||||||
|
.%T "Optimistic Sorting and Information Theoretic Complexity"
|
||||||
|
.%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms"
|
||||||
|
.%V January 1992
|
||||||
|
.Re
|
||||||
|
.Rs
|
||||||
|
.%A Bentley, J.L.
|
||||||
|
.%T "Engineering a Sort Function"
|
||||||
|
.%J "bentley@research.att.com"
|
||||||
|
.%V January 1992
|
||||||
|
.Re
|
||||||
.Sh STANDARDS
|
.Sh STANDARDS
|
||||||
The
|
The
|
||||||
.Fn qsort
|
.Fn qsort
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
|
* Copyright (c) 1992, 1993
|
||||||
* All rights reserved.
|
* The Regents of the University of California. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -32,245 +32,144 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(LIBC_SCCS) && !defined(lint)
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
/*static char *sccsid = "from: @(#)qsort.c 5.9 (Berkeley) 2/23/91";*/
|
/*static char sccsid[] = "from: @(#)qsort.c 8.1 (Berkeley) 6/4/93";*/
|
||||||
static char *rcsid = "$Id: qsort.c,v 1.3 1993/08/26 00:48:06 jtc Exp $";
|
static char *rcsid = "$Id: qsort.c,v 1.4 1994/06/16 05:26:39 mycroft Exp $";
|
||||||
#endif /* LIBC_SCCS and not lint */
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
/*
|
static inline char *med3 __P((char *, char *, char *, int (*)()));
|
||||||
* MTHRESH is the smallest partition for which we compare for a median
|
static inline void swapfunc __P((char *, char *, int, int));
|
||||||
* value instead of using the middle value.
|
|
||||||
*/
|
#define min(a, b) (a) < (b) ? a : b
|
||||||
#define MTHRESH 6
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* THRESH is the minimum number of entries in a partition for continued
|
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
|
||||||
* partitioning.
|
|
||||||
*/
|
*/
|
||||||
#define THRESH 4
|
#define swapcode(TYPE, parmi, parmj, n) { \
|
||||||
|
long i = (n) / sizeof (TYPE); \
|
||||||
|
register TYPE *pi = (TYPE *) (parmi); \
|
||||||
|
register TYPE *pj = (TYPE *) (parmj); \
|
||||||
|
do { \
|
||||||
|
register TYPE t = *pi; \
|
||||||
|
*pi++ = *pj; \
|
||||||
|
*pj++ = t; \
|
||||||
|
} while (--i > 0); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
|
||||||
|
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
swapfunc(a, b, n, swaptype)
|
||||||
|
char *a, *b;
|
||||||
|
int n, swaptype;
|
||||||
|
{
|
||||||
|
if(swaptype <= 1)
|
||||||
|
swapcode(long, a, b, n)
|
||||||
|
else
|
||||||
|
swapcode(char, a, b, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
#define swap(a, b) \
|
||||||
|
if (swaptype == 0) { \
|
||||||
|
long t = *(long *)(a); \
|
||||||
|
*(long *)(a) = *(long *)(b); \
|
||||||
|
*(long *)(b) = t; \
|
||||||
|
} else \
|
||||||
|
swapfunc(a, b, es, swaptype)
|
||||||
|
|
||||||
|
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
|
||||||
|
|
||||||
|
static inline char *
|
||||||
|
med3(a, b, c, cmp)
|
||||||
|
char *a, *b, *c;
|
||||||
|
int (*cmp)();
|
||||||
|
{
|
||||||
|
return cmp(a, b) < 0 ?
|
||||||
|
(cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
|
||||||
|
:(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
qsort(bot, nmemb, size, compar)
|
qsort(a, n, es, cmp)
|
||||||
void *bot;
|
void *a;
|
||||||
size_t nmemb, size;
|
size_t n, es;
|
||||||
int (*compar) __P((const void *, const void *));
|
int (*cmp)();
|
||||||
{
|
{
|
||||||
static void insertion_sort(), quick_sort();
|
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
|
||||||
|
int d, r, swaptype, swap_cnt;
|
||||||
|
|
||||||
if (nmemb <= 1)
|
loop: SWAPINIT(a, es);
|
||||||
|
swap_cnt = 0;
|
||||||
|
if (n < 7) {
|
||||||
|
for (pm = a + es; pm < (char *) a + n * es; pm += es)
|
||||||
|
for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
|
||||||
|
pl -= es)
|
||||||
|
swap(pl, pl - es);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (nmemb >= THRESH)
|
|
||||||
quick_sort(bot, nmemb, size, compar);
|
|
||||||
else
|
|
||||||
insertion_sort(bot, nmemb, size, compar);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Swap two areas of size number of bytes. Although qsort(3) permits random
|
|
||||||
* blocks of memory to be sorted, sorting pointers is almost certainly the
|
|
||||||
* common case (and, were it not, could easily be made so). Regardless, it
|
|
||||||
* isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
|
|
||||||
* arithmetic gets lost in the time required for comparison function calls.
|
|
||||||
*/
|
|
||||||
#define SWAP(a, b) { \
|
|
||||||
cnt = size; \
|
|
||||||
do { \
|
|
||||||
ch = *a; \
|
|
||||||
*a++ = *b; \
|
|
||||||
*b++ = ch; \
|
|
||||||
} while (--cnt); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
|
|
||||||
* of straight insertion sort after partitioning is complete is better than
|
|
||||||
* sorting each small partition as it is created. This isn't correct in this
|
|
||||||
* implementation because comparisons require at least one (and often two)
|
|
||||||
* function calls and are likely to be the dominating expense of the sort.
|
|
||||||
* Doing a final insertion sort does more comparisons than are necessary
|
|
||||||
* because it compares the "edges" and medians of the partitions which are
|
|
||||||
* known to be already sorted.
|
|
||||||
*
|
|
||||||
* This is also the reasoning behind selecting a small THRESH value (see
|
|
||||||
* Knuth, page 122, equation 26), since the quicksort algorithm does less
|
|
||||||
* comparisons than the insertion sort.
|
|
||||||
*/
|
|
||||||
#define SORT(bot, n) { \
|
|
||||||
if (n > 1) \
|
|
||||||
if (n == 2) { \
|
|
||||||
t1 = bot + size; \
|
|
||||||
if (compar(t1, bot) < 0) \
|
|
||||||
SWAP(t1, bot); \
|
|
||||||
} else \
|
|
||||||
insertion_sort(bot, n, size, compar); \
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
quick_sort(bot, nmemb, size, compar)
|
|
||||||
register char *bot;
|
|
||||||
register int size;
|
|
||||||
int nmemb, (*compar)();
|
|
||||||
{
|
|
||||||
register int cnt;
|
|
||||||
register u_char ch;
|
|
||||||
register char *top, *mid, *t1, *t2;
|
|
||||||
register int n1, n2;
|
|
||||||
char *bsv;
|
|
||||||
static void insertion_sort();
|
|
||||||
|
|
||||||
/* bot and nmemb must already be set. */
|
|
||||||
partition:
|
|
||||||
|
|
||||||
/* find mid and top elements */
|
|
||||||
mid = bot + size * (nmemb >> 1);
|
|
||||||
top = bot + (nmemb - 1) * size;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the median of the first, last and middle element (see Knuth,
|
|
||||||
* Vol. 3, page 123, Eq. 28). This test order gets the equalities
|
|
||||||
* right.
|
|
||||||
*/
|
|
||||||
if (nmemb >= MTHRESH) {
|
|
||||||
n1 = compar(bot, mid);
|
|
||||||
n2 = compar(mid, top);
|
|
||||||
if (n1 < 0 && n2 > 0)
|
|
||||||
t1 = compar(bot, top) < 0 ? top : bot;
|
|
||||||
else if (n1 > 0 && n2 < 0)
|
|
||||||
t1 = compar(bot, top) > 0 ? top : bot;
|
|
||||||
else
|
|
||||||
t1 = mid;
|
|
||||||
|
|
||||||
/* if mid element not selected, swap selection there */
|
|
||||||
if (t1 != mid) {
|
|
||||||
SWAP(t1, mid);
|
|
||||||
mid -= size;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
pm = a + (n / 2) * es;
|
||||||
/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
|
if (n > 7) {
|
||||||
#define didswap n1
|
pl = a;
|
||||||
#define newbot t1
|
pn = a + (n - 1) * es;
|
||||||
#define replace t2
|
if (n > 40) {
|
||||||
didswap = 0;
|
d = (n / 8) * es;
|
||||||
for (bsv = bot;;) {
|
pl = med3(pl, pl + d, pl + 2 * d, cmp);
|
||||||
for (; bot < mid && compar(bot, mid) <= 0; bot += size);
|
pm = med3(pm - d, pm, pm + d, cmp);
|
||||||
while (top > mid) {
|
pn = med3(pn - 2 * d, pn - d, pn, cmp);
|
||||||
if (compar(mid, top) <= 0) {
|
|
||||||
top -= size;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
newbot = bot + size; /* value of bot after swap */
|
|
||||||
if (bot == mid) /* top <-> mid, mid == top */
|
|
||||||
replace = mid = top;
|
|
||||||
else { /* bot <-> top */
|
|
||||||
replace = top;
|
|
||||||
top -= size;
|
|
||||||
}
|
|
||||||
goto swap;
|
|
||||||
}
|
}
|
||||||
if (bot == mid)
|
pm = med3(pl, pm, pn, cmp);
|
||||||
|
}
|
||||||
|
swap(a, pm);
|
||||||
|
pa = pb = a + es;
|
||||||
|
|
||||||
|
pc = pd = a + (n - 1) * es;
|
||||||
|
for (;;) {
|
||||||
|
while (pb <= pc && (r = cmp(pb, a)) <= 0) {
|
||||||
|
if (r == 0) {
|
||||||
|
swap_cnt = 1;
|
||||||
|
swap(pa, pb);
|
||||||
|
pa += es;
|
||||||
|
}
|
||||||
|
pb += es;
|
||||||
|
}
|
||||||
|
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
|
||||||
|
if (r == 0) {
|
||||||
|
swap_cnt = 1;
|
||||||
|
swap(pc, pd);
|
||||||
|
pd -= es;
|
||||||
|
}
|
||||||
|
pc -= es;
|
||||||
|
}
|
||||||
|
if (pb > pc)
|
||||||
break;
|
break;
|
||||||
|
swap(pb, pc);
|
||||||
/* bot <-> mid, mid == bot */
|
swap_cnt = 1;
|
||||||
replace = mid;
|
pb += es;
|
||||||
newbot = mid = bot; /* value of bot after swap */
|
pc -= es;
|
||||||
top -= size;
|
|
||||||
|
|
||||||
swap: SWAP(bot, replace);
|
|
||||||
bot = newbot;
|
|
||||||
didswap = 1;
|
|
||||||
}
|
}
|
||||||
|
if (swap_cnt == 0) { /* Switch to insertion sort */
|
||||||
/*
|
for (pm = a + es; pm < (char *) a + n * es; pm += es)
|
||||||
* Quicksort behaves badly in the presence of data which is already
|
for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
|
||||||
* sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
|
pl -= es)
|
||||||
* To avoid this worst case behavior, if a re-partitioning occurs
|
swap(pl, pl - es);
|
||||||
* without swapping any elements, it is not further partitioned and
|
|
||||||
* is insert sorted. This wins big with almost sorted data sets and
|
|
||||||
* only loses if the data set is very strangely partitioned. A fix
|
|
||||||
* for those data sets would be to return prematurely if the insertion
|
|
||||||
* sort routine is forced to make an excessive number of swaps, and
|
|
||||||
* continue the partitioning.
|
|
||||||
*/
|
|
||||||
if (!didswap) {
|
|
||||||
insertion_sort(bsv, nmemb, size, compar);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
pn = a + n * es;
|
||||||
* Re-partition or sort as necessary. Note that the mid element
|
r = min(pa - (char *)a, pb - pa);
|
||||||
* itself is correctly positioned and can be ignored.
|
vecswap(a, pb - r, r);
|
||||||
*/
|
r = min(pd - pc, pn - pd - es);
|
||||||
#define nlower n1
|
vecswap(pb, pn - r, r);
|
||||||
#define nupper n2
|
if ((r = pb - pa) > es)
|
||||||
bot = bsv;
|
qsort(a, r / es, es, cmp);
|
||||||
nlower = (mid - bot) / size; /* size of lower partition */
|
if ((r = pd - pc) > es) {
|
||||||
mid += size;
|
/* Iterate rather than recurse to save stack space */
|
||||||
nupper = nmemb - nlower - 1; /* size of upper partition */
|
a = pn - r;
|
||||||
|
n = r / es;
|
||||||
/*
|
goto loop;
|
||||||
* If must call recursively, do it on the smaller partition; this
|
|
||||||
* bounds the stack to lg N entries.
|
|
||||||
*/
|
|
||||||
if (nlower > nupper) {
|
|
||||||
if (nupper >= THRESH)
|
|
||||||
quick_sort(mid, nupper, size, compar);
|
|
||||||
else {
|
|
||||||
SORT(mid, nupper);
|
|
||||||
if (nlower < THRESH) {
|
|
||||||
SORT(bot, nlower);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nmemb = nlower;
|
|
||||||
} else {
|
|
||||||
if (nlower >= THRESH)
|
|
||||||
quick_sort(bot, nlower, size, compar);
|
|
||||||
else {
|
|
||||||
SORT(bot, nlower);
|
|
||||||
if (nupper < THRESH) {
|
|
||||||
SORT(mid, nupper);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bot = mid;
|
|
||||||
nmemb = nupper;
|
|
||||||
}
|
|
||||||
goto partition;
|
|
||||||
/* NOTREACHED */
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
insertion_sort(bot, nmemb, size, compar)
|
|
||||||
char *bot;
|
|
||||||
register int size;
|
|
||||||
int nmemb, (*compar)();
|
|
||||||
{
|
|
||||||
register int cnt;
|
|
||||||
register u_char ch;
|
|
||||||
register char *s1, *s2, *t1, *t2, *top;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
|
|
||||||
* S). Insertion sort has the same worst case as most simple sorts
|
|
||||||
* (O N^2). It gets used here because it is (O N) in the case of
|
|
||||||
* sorted data.
|
|
||||||
*/
|
|
||||||
top = bot + nmemb * size;
|
|
||||||
for (t1 = bot + size; t1 < top;) {
|
|
||||||
for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
|
|
||||||
if (t1 != (t2 += size)) {
|
|
||||||
/* Bubble bytes up through each element. */
|
|
||||||
for (cnt = size; cnt--; ++t1) {
|
|
||||||
ch = *t1;
|
|
||||||
for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
|
|
||||||
*s1 = *s2;
|
|
||||||
*s1 = ch;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
t1 += size;
|
|
||||||
}
|
}
|
||||||
|
/* qsort(pn - r, r / es, es, cmp);*/
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" Copyright (c) 1990, 1991 The Regents of the University of California.
|
.\" Copyright (c) 1990, 1991, 1993
|
||||||
.\" All rights reserved.
|
.\" The Regents of the University of California. All rights reserved.
|
||||||
.\"
|
.\"
|
||||||
.\" Redistribution and use in source and binary forms, with or without
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
.\" modification, are permitted provided that the following conditions
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
@ -29,10 +29,10 @@
|
||||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
.\" SUCH DAMAGE.
|
.\" SUCH DAMAGE.
|
||||||
.\"
|
.\"
|
||||||
.\" from: @(#)radixsort.3 5.5 (Berkeley) 4/19/91
|
.\" from: @(#)radixsort.3 8.2 (Berkeley) 1/27/94
|
||||||
.\" $Id: radixsort.3,v 1.2 1993/08/01 07:44:21 mycroft Exp $
|
.\" $Id: radixsort.3,v 1.3 1994/06/16 05:26:40 mycroft Exp $
|
||||||
.\"
|
.\"
|
||||||
.Dd April 19, 1991
|
.Dd January 27, 1994
|
||||||
.Dt RADIXSORT 3
|
.Dt RADIXSORT 3
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
|
@ -42,27 +42,23 @@
|
||||||
.Fd #include <limits.h>
|
.Fd #include <limits.h>
|
||||||
.Fd #include <stdlib.h>
|
.Fd #include <stdlib.h>
|
||||||
.Ft int
|
.Ft int
|
||||||
.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_char endbyte"
|
.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
|
||||||
|
.Ft int
|
||||||
|
.Fn sradixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
The
|
The
|
||||||
.Fn radixsort
|
.Fn radixsort
|
||||||
function
|
and
|
||||||
is a modified radix sort.
|
.Fn sradixsort
|
||||||
|
functions
|
||||||
|
are implementations of radix sort.
|
||||||
.Pp
|
.Pp
|
||||||
The
|
These functions sort an array of pointers to byte strings, the initial
|
||||||
.Fn radixsort
|
member of which is referenced by
|
||||||
function sorts an array of
|
|
||||||
.Fa nmemb
|
|
||||||
pointers to byte strings, the initial member of which is referenced
|
|
||||||
by
|
|
||||||
.Fa base .
|
.Fa base .
|
||||||
The byte strings may contain any values; the end of each string
|
The byte strings may contain any values; the end of each string
|
||||||
is denoted by the user-specified value
|
is denoted by the user-specified value
|
||||||
.Fa endbyte .
|
.Fa endbyte .
|
||||||
The contents of the array are sorted in ascending order according
|
|
||||||
to the
|
|
||||||
.Tn ASCII
|
|
||||||
order of the byte strings they reference.
|
|
||||||
.Pp
|
.Pp
|
||||||
Applications may specify a sort order by providing the
|
Applications may specify a sort order by providing the
|
||||||
.Fa table
|
.Fa table
|
||||||
|
@ -74,38 +70,58 @@ must reference an array of
|
||||||
.Dv UCHAR_MAX
|
.Dv UCHAR_MAX
|
||||||
+ 1 bytes which contains the sort
|
+ 1 bytes which contains the sort
|
||||||
weight of each possible byte value.
|
weight of each possible byte value.
|
||||||
The end-of-string byte must have a sort weight of 0.
|
The end-of-string byte must have a sort weight of 0 or 255
|
||||||
|
(for sorting in reverse order).
|
||||||
More than one byte may have the same sort weight.
|
More than one byte may have the same sort weight.
|
||||||
The
|
The
|
||||||
.Fa table
|
.Fa table
|
||||||
argument
|
argument
|
||||||
is useful for applications which wish to sort different characters
|
is useful for applications which wish to sort different characters
|
||||||
equally; for example, providing a table with the same weights
|
equally, for example, providing a table with the same weights
|
||||||
for A-Z as for a-z will result in a case-insensitive sort.
|
for A-Z as for a-z will result in a case-insensitive sort.
|
||||||
|
If
|
||||||
|
.Fa table
|
||||||
|
is NULL, the contents of the array are sorted in ascending order
|
||||||
|
according to the
|
||||||
|
.Tn ASCII
|
||||||
|
order of the byte strings they reference and
|
||||||
|
.Fa endbyte
|
||||||
|
has a sorting weight of 0.
|
||||||
|
.Pp
|
||||||
|
The
|
||||||
|
.Fn sradixsort
|
||||||
|
function is stable, that is, if two elements compare as equal, their
|
||||||
|
order in the sorted array is unchanged.
|
||||||
|
The
|
||||||
|
.Fn sradixsort
|
||||||
|
function uses additional memory sufficient to hold
|
||||||
|
.Fa nmemb
|
||||||
|
pointers.
|
||||||
.Pp
|
.Pp
|
||||||
The
|
The
|
||||||
.Fn radixsort
|
.Fn radixsort
|
||||||
function
|
function is not stable, but uses no additional memory.
|
||||||
is stable, that is, if two elements compare as equal, their order in
|
|
||||||
the sorted array is unchanged.
|
|
||||||
.Pp
|
.Pp
|
||||||
The
|
These functions are variants of most-significant-byte radix sorting; in
|
||||||
.Fn radixsort
|
particular, see D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
|
||||||
function
|
They take linear time relative to the number of bytes in the strings.
|
||||||
is a variant of most-significant-byte radix sorting; in particular, see
|
|
||||||
D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
|
|
||||||
The
|
|
||||||
.Fn radixsort
|
|
||||||
function
|
|
||||||
takes linear time relative to the number of bytes in the strings.
|
|
||||||
.Sh RETURN VALUES
|
.Sh RETURN VALUES
|
||||||
Upon successful completion 0 is returned.
|
Upon successful completion 0 is returned.
|
||||||
Otherwise, \-1 is returned and the global variable
|
Otherwise, \-1 is returned and the global variable
|
||||||
.Va errno
|
.Va errno
|
||||||
is set to indicate the error.
|
is set to indicate the error.
|
||||||
.Sh ERRORS
|
.Sh ERRORS
|
||||||
The
|
.Bl -tag -width Er
|
||||||
.Fn radixsort
|
.It Bq Er EINVAL
|
||||||
|
The value of the
|
||||||
|
.Fa endbyte
|
||||||
|
element of
|
||||||
|
.Fa table
|
||||||
|
is not 0 or 255.
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
Additionally, the
|
||||||
|
.Fn sradixsort
|
||||||
function
|
function
|
||||||
may fail and set
|
may fail and set
|
||||||
.Va errno
|
.Va errno
|
||||||
|
@ -131,14 +147,15 @@ for any of the errors specified for the library routine
|
||||||
.%V Vol. 16
|
.%V Vol. 16
|
||||||
.%N No. 6
|
.%N No. 6
|
||||||
.Re
|
.Re
|
||||||
|
.Rs
|
||||||
|
.%A McIlroy, P.
|
||||||
|
.%D 1993
|
||||||
|
.%B "Engineering Radix Sort"
|
||||||
|
.%T "Computing Systems"
|
||||||
|
.%V Vol. 6:1
|
||||||
|
.%P pp. 5-27
|
||||||
|
.Re
|
||||||
.Sh HISTORY
|
.Sh HISTORY
|
||||||
The
|
The
|
||||||
.Fn radixsort
|
.Fn radixsort
|
||||||
function is
|
function first appeared in 4.4BSD.
|
||||||
.Ud .
|
|
||||||
.Sh BUGS
|
|
||||||
The
|
|
||||||
.Fa nmemb
|
|
||||||
argument
|
|
||||||
must be less than the maximum integer,
|
|
||||||
.Dv INT_MAX .
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 1990 The Regents of the University of California.
|
* Copyright (c) 1990, 1993
|
||||||
* All rights reserved.
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Peter McIlroy and by Dan Bernstein at New York University,
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -32,259 +35,285 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(LIBC_SCCS) && !defined(lint)
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
/*static char *sccsid = "from: @(#)radixsort.c 5.7 (Berkeley) 2/23/91";*/
|
/*static char sccsid[] = "from: @(#)radixsort.c 8.1 (Berkeley) 6/4/93";*/
|
||||||
static char *rcsid = "$Id: radixsort.c,v 1.3 1993/08/26 00:48:07 jtc Exp $";
|
static char *rcsid = "$Id: radixsort.c,v 1.4 1994/06/16 05:26:44 mycroft Exp $";
|
||||||
#endif /* LIBC_SCCS and not lint */
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Radixsort routines.
|
||||||
|
*
|
||||||
|
* Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
|
||||||
|
* Use radixsort(a, n, trace, endchar) for this case.
|
||||||
|
*
|
||||||
|
* For stable sorting (using N extra pointers) use sradixsort(), which calls
|
||||||
|
* r_sort_b().
|
||||||
|
*
|
||||||
|
* For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
|
||||||
|
* "Engineering Radix Sort".
|
||||||
|
*/
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <limits.h>
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <errno.h>
|
||||||
|
|
||||||
/*
|
typedef struct {
|
||||||
* __rspartition is the cutoff point for a further partitioning instead
|
const u_char **sa;
|
||||||
* of a shellsort. If it changes check __rsshell_increments. Both of
|
int sn, si;
|
||||||
* these are exported, as the best values are data dependent.
|
} stack;
|
||||||
*/
|
|
||||||
#define NPARTITION 40
|
|
||||||
int __rspartition = NPARTITION;
|
|
||||||
int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 };
|
|
||||||
|
|
||||||
/*
|
static inline void simplesort
|
||||||
* Stackp points to context structures, where each structure schedules a
|
__P((const u_char **, int, int, const u_char *, u_int));
|
||||||
* partitioning. Radixsort exits when the stack is empty.
|
static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
|
||||||
*
|
static void r_sort_b __P((const u_char **,
|
||||||
* If the buckets are placed on the stack randomly, the worst case is when
|
const u_char **, int, int, const u_char *, u_int));
|
||||||
* all the buckets but one contain (npartitions + 1) elements and the bucket
|
|
||||||
* pushed on the stack last contains the rest of the elements. In this case,
|
|
||||||
* stack growth is bounded by:
|
|
||||||
*
|
|
||||||
* limit = (nelements / (npartitions + 1)) - 1;
|
|
||||||
*
|
|
||||||
* This is a very large number, 52,377,648 for the maximum 32-bit signed int.
|
|
||||||
*
|
|
||||||
* By forcing the largest bucket to be pushed on the stack first, the worst
|
|
||||||
* case is when all but two buckets each contain (npartitions + 1) elements,
|
|
||||||
* with the remaining elements split equally between the first and last
|
|
||||||
* buckets pushed on the stack. In this case, stack growth is bounded when:
|
|
||||||
*
|
|
||||||
* for (partition_cnt = 0; nelements > npartitions; ++partition_cnt)
|
|
||||||
* nelements =
|
|
||||||
* (nelements - (npartitions + 1) * (nbuckets - 2)) / 2;
|
|
||||||
* The bound is:
|
|
||||||
*
|
|
||||||
* limit = partition_cnt * (nbuckets - 1);
|
|
||||||
*
|
|
||||||
* This is a much smaller number, 4590 for the maximum 32-bit signed int.
|
|
||||||
*/
|
|
||||||
#define NBUCKETS (UCHAR_MAX + 1)
|
|
||||||
|
|
||||||
typedef struct _stack {
|
#define THRESHOLD 20 /* Divert to simplesort(). */
|
||||||
const u_char **bot;
|
#define SIZE 512 /* Default stack size. */
|
||||||
int indx, nmemb;
|
|
||||||
} CONTEXT;
|
|
||||||
|
|
||||||
#define STACKPUSH { \
|
#define SETUP { \
|
||||||
stackp->bot = p; \
|
if (tab == NULL) { \
|
||||||
stackp->nmemb = nmemb; \
|
tr = tr0; \
|
||||||
stackp->indx = indx; \
|
for (c = 0; c < endch; c++) \
|
||||||
++stackp; \
|
tr0[c] = c + 1; \
|
||||||
}
|
tr0[c] = 0; \
|
||||||
#define STACKPOP { \
|
for (c++; c < 256; c++) \
|
||||||
if (stackp == stack) \
|
tr0[c] = c; \
|
||||||
break; \
|
endch = 0; \
|
||||||
--stackp; \
|
} else { \
|
||||||
bot = stackp->bot; \
|
endch = tab[endch]; \
|
||||||
nmemb = stackp->nmemb; \
|
tr = tab; \
|
||||||
indx = stackp->indx; \
|
if (endch != 0 && endch != 255) { \
|
||||||
|
errno = EINVAL; \
|
||||||
|
return (-1); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5,
|
|
||||||
* Ex. 10 and 12. Also, "Three Partition Refinement Algorithms, Paige
|
|
||||||
* and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987.
|
|
||||||
*
|
|
||||||
* This uses a simple sort as soon as a bucket crosses a cutoff point,
|
|
||||||
* rather than sorting the entire list after partitioning is finished.
|
|
||||||
* This should be an advantage.
|
|
||||||
*
|
|
||||||
* This is pure MSD instead of LSD of some number of MSD, switching to
|
|
||||||
* the simple sort as soon as possible. Takes linear time relative to
|
|
||||||
* the number of bytes in the strings.
|
|
||||||
*/
|
|
||||||
int
|
int
|
||||||
#if __STDC__
|
radixsort(a, n, tab, endch)
|
||||||
radixsort(const u_char **l1, int nmemb, const u_char *tab, u_char endbyte)
|
const u_char **a, *tab;
|
||||||
#else
|
int n;
|
||||||
radixsort(l1, nmemb, tab, endbyte)
|
u_int endch;
|
||||||
const u_char **l1;
|
|
||||||
register int nmemb;
|
|
||||||
const u_char *tab;
|
|
||||||
u_char endbyte;
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
register int i, indx, t1, t2;
|
const u_char *tr;
|
||||||
register const u_char **l2;
|
int c;
|
||||||
register const u_char **p;
|
u_char tr0[256];
|
||||||
register const u_char **bot;
|
|
||||||
register const u_char *tr;
|
|
||||||
CONTEXT *stack, *stackp;
|
|
||||||
int c[NBUCKETS + 1], max;
|
|
||||||
u_char ltab[NBUCKETS];
|
|
||||||
static void shellsort();
|
|
||||||
|
|
||||||
if (nmemb <= 1)
|
SETUP;
|
||||||
return(0);
|
r_sort_a(a, n, 0, tr, endch);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
int
|
||||||
* T1 is the constant part of the equation, the number of elements
|
sradixsort(a, n, tab, endch)
|
||||||
* represented on the stack between the top and bottom entries.
|
const u_char **a, *tab;
|
||||||
* It doesn't get rounded as the divide by 2 rounds down (correct
|
int n;
|
||||||
* for a value being subtracted). T2, the nelem value, has to be
|
u_int endch;
|
||||||
* rounded up before each divide because we want an upper bound;
|
{
|
||||||
* this could overflow if nmemb is the maximum int.
|
const u_char *tr, **ta;
|
||||||
*/
|
int c;
|
||||||
t1 = ((__rspartition + 1) * (NBUCKETS - 2)) >> 1;
|
u_char tr0[256];
|
||||||
for (i = 0, t2 = nmemb; t2 > __rspartition; i += NBUCKETS - 1)
|
|
||||||
t2 = ((t2 + 1) >> 1) - t1;
|
|
||||||
if (i) {
|
|
||||||
if (!(stack = stackp = (CONTEXT *)malloc(i * sizeof(CONTEXT))))
|
|
||||||
return(-1);
|
|
||||||
} else
|
|
||||||
stack = stackp = NULL;
|
|
||||||
|
|
||||||
/*
|
SETUP;
|
||||||
* There are two arrays, one provided by the user (l1), and the
|
if (n < THRESHOLD)
|
||||||
* temporary one (l2). The data is sorted to the temporary stack,
|
simplesort(a, n, 0, tr, endch);
|
||||||
* and then copied back. The speedup of using index to determine
|
|
||||||
* which stack the data is on and simply swapping stacks back and
|
|
||||||
* forth, thus avoiding the copy every iteration, turns out to not
|
|
||||||
* be any faster than the current implementation.
|
|
||||||
*/
|
|
||||||
if (!(l2 = (const u_char **)malloc(sizeof(u_char *) * nmemb)))
|
|
||||||
return(-1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Tr references a table of sort weights; multiple entries may
|
|
||||||
* map to the same weight; EOS char must have the lowest weight.
|
|
||||||
*/
|
|
||||||
if (tab)
|
|
||||||
tr = tab;
|
|
||||||
else {
|
else {
|
||||||
for (t1 = 0, t2 = endbyte; t1 < t2; ++t1)
|
if ((ta = malloc(n * sizeof(a))) == NULL)
|
||||||
ltab[t1] = t1 + 1;
|
return (-1);
|
||||||
ltab[t2] = 0;
|
r_sort_b(a, ta, n, 0, tr, endch);
|
||||||
for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1)
|
free(ta);
|
||||||
ltab[t1] = t1;
|
|
||||||
tr = ltab;
|
|
||||||
}
|
}
|
||||||
|
return (0);
|
||||||
/* First sort is entire stack */
|
|
||||||
bot = l1;
|
|
||||||
indx = 0;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
/* Clear bucket count array */
|
|
||||||
bzero((char *)c, sizeof(c));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute number of items that sort to the same bucket
|
|
||||||
* for this index.
|
|
||||||
*/
|
|
||||||
for (p = bot, i = nmemb; --i >= 0;)
|
|
||||||
++c[tr[(*p++)[indx]]];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Sum the number of characters into c, dividing the temp
|
|
||||||
* stack into the right number of buckets for this bucket,
|
|
||||||
* this index. C contains the cumulative total of keys
|
|
||||||
* before and included in this bucket, and will later be
|
|
||||||
* used as an index to the bucket. c[NBUCKETS] contains
|
|
||||||
* the total number of elements, for determining how many
|
|
||||||
* elements the last bucket contains. At the same time
|
|
||||||
* find the largest bucket so it gets pushed first.
|
|
||||||
*/
|
|
||||||
for (i = max = t1 = 0, t2 = __rspartition; i <= NBUCKETS; ++i) {
|
|
||||||
if (c[i] > t2) {
|
|
||||||
t2 = c[i];
|
|
||||||
max = i;
|
|
||||||
}
|
|
||||||
t1 = c[i] += t1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Partition the elements into buckets; c decrements through
|
|
||||||
* the bucket, and ends up pointing to the first element of
|
|
||||||
* the bucket.
|
|
||||||
*/
|
|
||||||
for (i = nmemb; --i >= 0;) {
|
|
||||||
--p;
|
|
||||||
l2[--c[tr[(*p)[indx]]]] = *p;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy the partitioned elements back to user stack */
|
|
||||||
bcopy(l2, bot, nmemb * sizeof(u_char *));
|
|
||||||
|
|
||||||
++indx;
|
|
||||||
/*
|
|
||||||
* Sort buckets as necessary; don't sort c[0], it's the
|
|
||||||
* EOS character bucket, and nothing can follow EOS.
|
|
||||||
*/
|
|
||||||
for (i = max; i; --i) {
|
|
||||||
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
|
|
||||||
continue;
|
|
||||||
p = bot + t1;
|
|
||||||
if (nmemb > __rspartition)
|
|
||||||
STACKPUSH
|
|
||||||
else
|
|
||||||
shellsort(p, indx, nmemb, tr);
|
|
||||||
}
|
|
||||||
for (i = max + 1; i < NBUCKETS; ++i) {
|
|
||||||
if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
|
|
||||||
continue;
|
|
||||||
p = bot + t1;
|
|
||||||
if (nmemb > __rspartition)
|
|
||||||
STACKPUSH
|
|
||||||
else
|
|
||||||
shellsort(p, indx, nmemb, tr);
|
|
||||||
}
|
|
||||||
/* Break out when stack is empty */
|
|
||||||
STACKPOP
|
|
||||||
}
|
|
||||||
|
|
||||||
free((char *)l2);
|
|
||||||
free((char *)stack);
|
|
||||||
return(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
#define empty(s) (s >= sp)
|
||||||
* Shellsort (diminishing increment sort) from Data Structures and
|
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
|
||||||
* Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
|
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
|
||||||
* see also Knuth Vol. 3, page 84. The increments are selected from
|
#define swap(a, b, t) t = a, a = b, b = t
|
||||||
* formula (8), page 95. Roughly O(N^3/2).
|
|
||||||
*/
|
/* Unstable, in-place sort. */
|
||||||
static void
|
void
|
||||||
shellsort(p, indx, nmemb, tr)
|
r_sort_a(a, n, i, tr, endch)
|
||||||
register u_char **p, *tr;
|
const u_char **a;
|
||||||
register int indx, nmemb;
|
int n, i;
|
||||||
|
const u_char *tr;
|
||||||
|
u_int endch;
|
||||||
{
|
{
|
||||||
register u_char ch, *s1, *s2;
|
static int count[256], nc, bmin;
|
||||||
register int incr, *incrp, t1, t2;
|
register int c;
|
||||||
|
register const u_char **ak, *r;
|
||||||
|
stack s[SIZE], *sp, *sp0, *sp1, temp;
|
||||||
|
int *cp, bigc;
|
||||||
|
const u_char **an, *t, **aj, **top[256];
|
||||||
|
|
||||||
for (incrp = __rsshell_increments; incr = *incrp++;)
|
/* Set up stack. */
|
||||||
for (t1 = incr; t1 < nmemb; ++t1)
|
sp = s;
|
||||||
for (t2 = t1 - incr; t2 >= 0;) {
|
push(a, n, i);
|
||||||
s1 = p[t2] + indx;
|
while (!empty(s)) {
|
||||||
s2 = p[t2 + incr] + indx;
|
pop(a, n, i);
|
||||||
while ((ch = tr[*s1++]) == tr[*s2] && ch)
|
if (n < THRESHOLD) {
|
||||||
++s2;
|
simplesort(a, n, i, tr, endch);
|
||||||
if (ch > tr[*s2]) {
|
continue;
|
||||||
s1 = p[t2];
|
}
|
||||||
p[t2] = p[t2 + incr];
|
an = a + n;
|
||||||
p[t2 + incr] = s1;
|
|
||||||
t2 -= incr;
|
/* Make character histogram. */
|
||||||
} else
|
if (nc == 0) {
|
||||||
break;
|
bmin = 255; /* First occupied bin, excluding eos. */
|
||||||
|
for (ak = a; ak < an;) {
|
||||||
|
c = tr[(*ak++)[i]];
|
||||||
|
if (++count[c] == 1 && c != endch) {
|
||||||
|
if (c < bmin)
|
||||||
|
bmin = c;
|
||||||
|
nc++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (sp + nc > s + SIZE) { /* Get more stack. */
|
||||||
|
r_sort_a(a, n, i, tr, endch);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set top[]; push incompletely sorted bins onto stack.
|
||||||
|
* top[] = pointers to last out-of-place element in bins.
|
||||||
|
* count[] = counts of elements in bins.
|
||||||
|
* Before permuting: top[c-1] + count[c] = top[c];
|
||||||
|
* during deal: top[c] counts down to top[c-1].
|
||||||
|
*/
|
||||||
|
sp0 = sp1 = sp; /* Stack position of biggest bin. */
|
||||||
|
bigc = 2; /* Size of biggest bin. */
|
||||||
|
if (endch == 0) /* Special case: set top[eos]. */
|
||||||
|
top[0] = ak = a + count[0];
|
||||||
|
else {
|
||||||
|
ak = a;
|
||||||
|
top[255] = an;
|
||||||
|
}
|
||||||
|
for (cp = count + bmin; nc > 0; cp++) {
|
||||||
|
while (*cp == 0) /* Find next non-empty pile. */
|
||||||
|
cp++;
|
||||||
|
if (*cp > 1) {
|
||||||
|
if (*cp > bigc) {
|
||||||
|
bigc = *cp;
|
||||||
|
sp1 = sp;
|
||||||
|
}
|
||||||
|
push(ak, *cp, i+1);
|
||||||
|
}
|
||||||
|
top[cp-count] = ak += *cp;
|
||||||
|
nc--;
|
||||||
|
}
|
||||||
|
swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Permute misplacements home. Already home: everything
|
||||||
|
* before aj, and in bin[c], items from top[c] on.
|
||||||
|
* Inner loop:
|
||||||
|
* r = next element to put in place;
|
||||||
|
* ak = top[r[i]] = location to put the next element.
|
||||||
|
* aj = bottom of 1st disordered bin.
|
||||||
|
* Outer loop:
|
||||||
|
* Once the 1st disordered bin is done, ie. aj >= ak,
|
||||||
|
* aj<-aj + count[c] connects the bins in a linked list;
|
||||||
|
* reset count[c].
|
||||||
|
*/
|
||||||
|
for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0)
|
||||||
|
for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);)
|
||||||
|
swap(*ak, r, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stable sort, requiring additional memory. */
|
||||||
|
void
|
||||||
|
r_sort_b(a, ta, n, i, tr, endch)
|
||||||
|
const u_char **a, **ta;
|
||||||
|
int n, i;
|
||||||
|
const u_char *tr;
|
||||||
|
u_int endch;
|
||||||
|
{
|
||||||
|
static int count[256], nc, bmin;
|
||||||
|
register int c;
|
||||||
|
register const u_char **ak, **ai;
|
||||||
|
stack s[512], *sp, *sp0, *sp1, temp;
|
||||||
|
const u_char **top[256];
|
||||||
|
int *cp, bigc;
|
||||||
|
|
||||||
|
sp = s;
|
||||||
|
push(a, n, i);
|
||||||
|
while (!empty(s)) {
|
||||||
|
pop(a, n, i);
|
||||||
|
if (n < THRESHOLD) {
|
||||||
|
simplesort(a, n, i, tr, endch);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nc == 0) {
|
||||||
|
bmin = 255;
|
||||||
|
for (ak = a + n; --ak >= a;) {
|
||||||
|
c = tr[(*ak)[i]];
|
||||||
|
if (++count[c] == 1 && c != endch) {
|
||||||
|
if (c < bmin)
|
||||||
|
bmin = c;
|
||||||
|
nc++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sp + nc > s + SIZE) {
|
||||||
|
r_sort_b(a, ta, n, i, tr, endch);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sp0 = sp1 = sp;
|
||||||
|
bigc = 2;
|
||||||
|
if (endch == 0) {
|
||||||
|
top[0] = ak = a + count[0];
|
||||||
|
count[0] = 0;
|
||||||
|
} else {
|
||||||
|
ak = a;
|
||||||
|
top[255] = a + n;
|
||||||
|
count[255] = 0;
|
||||||
|
}
|
||||||
|
for (cp = count + bmin; nc > 0; cp++) {
|
||||||
|
while (*cp == 0)
|
||||||
|
cp++;
|
||||||
|
if ((c = *cp) > 1) {
|
||||||
|
if (c > bigc) {
|
||||||
|
bigc = c;
|
||||||
|
sp1 = sp;
|
||||||
|
}
|
||||||
|
push(ak, c, i+1);
|
||||||
|
}
|
||||||
|
top[cp-count] = ak += c;
|
||||||
|
*cp = 0; /* Reset count[]. */
|
||||||
|
nc--;
|
||||||
|
}
|
||||||
|
swap(*sp0, *sp1, temp);
|
||||||
|
|
||||||
|
for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
|
||||||
|
*--ak = *--ai;
|
||||||
|
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
|
||||||
|
*--top[tr[(*ak)[i]]] = *ak;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
simplesort(a, n, b, tr, endch) /* insertion sort */
|
||||||
|
register const u_char **a;
|
||||||
|
int n, b;
|
||||||
|
register const u_char *tr;
|
||||||
|
u_int endch;
|
||||||
|
{
|
||||||
|
register u_char ch;
|
||||||
|
const u_char **ak, **ai, *s, *t;
|
||||||
|
|
||||||
|
for (ak = a+1; --n >= 1; ak++)
|
||||||
|
for (ai = ak; ai > a; ai--) {
|
||||||
|
for (s = ai[0] + b, t = ai[-1] + b;
|
||||||
|
(ch = tr[*s]) != endch; s++, t++)
|
||||||
|
if (ch != tr[*t])
|
||||||
|
break;
|
||||||
|
if (ch >= tr[*t])
|
||||||
|
break;
|
||||||
|
swap(ai[0], ai[-1], s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue