Add RCS ids.

1994-06-16 05:26:34 +00:00 · 1994-06-16 05:26:34 +00:00 · 2f86deea8e
parent 2c1d50080b
commit 2f86deea8e
6 changed files with 617 additions and 547 deletions
--- a/lib/libc/stdlib/heapsort.c
+++ b/lib/libc/stdlib/heapsort.c
@ -1,6 +1,9 @@
 /*-
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,14 +35,14 @@
 */

 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)heapsort.c	5.1 (Berkeley) 6/4/91";*/
-static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
+/*static char sccsid[] = "from: @(#)heapsort.c	8.1 (Berkeley) 6/4/93";*/
+static char *rcsid = "$Id: heapsort.c,v 1.4 1994/06/16 05:26:34 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */

-#include <sys/cdefs.h>
 #include <sys/types.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <stddef.h>

 /*
 * Swap two areas of size number of bytes.  Although qsort(3) permits random
@ -48,13 +51,23 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
 * arithmetic gets lost in the time required for comparison function calls.
 */
-#define	SWAP(a, b) { \
-	cnt = size; \
+#define	SWAP(a, b, count, size, tmp) { \
+	count = size; \
 	do { \
-		ch = *a; \
+		tmp = *a; \
 		*a++ = *b; \
-		*b++ = ch; \
-	} while (--cnt); \
+		*b++ = tmp; \
+	} while (--count); \
+}
+
+/* Copy one block of size size to another. */
+#define COPY(a, b, count, size, tmp1, tmp2) { \
+	count = size; \
+	tmp1 = a; \
+	tmp2 = b; \
+	do { \
+		*tmp1++ = *tmp2++; \
+	} while (--count); \
 }

 /*
@ -63,21 +76,59 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 *
 * There two cases.  If j == nmemb, select largest of Ki and Kj.  If
 * j < nmemb, select largest of Ki, Kj and Kj+1.
- *
- * The initial value depends on if we're building the initial heap or
- * reconstructing it after saving a value.
 */
-#define	HEAP(initval) { \
-	for (i = initval; (j = i * 2) <= nmemb; i = j) { \
-		p = (char *)bot + j * size; \
-		if (j < nmemb && compar(p, p + size) < 0) { \
-			p += size; \
-			++j; \
+#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
+	for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
+	    par_i = child_i) { \
+		child = base + child_i * size; \
+		if (child_i < nmemb && compar(child, child + size) < 0) { \
+			child += size; \
+			++child_i; \
 		} \
-		t = (char *)bot + i * size; \
-		if (compar(p, t) <= 0) \
+		par = base + par_i * size; \
+		if (compar(child, par) <= 0) \
 			break; \
-		SWAP(t, p); \
+		SWAP(par, child, count, size, tmp); \
+	} \
+}
+
+/*
+ * Select the top of the heap and 'heapify'.  Since by far the most expensive
+ * action is the call to the compar function, a considerable optimization
+ * in the average case can be achieved due to the fact that k, the displaced
+ * elememt, is ususally quite small, so it would be preferable to first
+ * heapify, always maintaining the invariant that the larger child is copied
+ * over its parent's record.
+ *
+ * Then, starting from the *bottom* of the heap, finding k's correct place,
+ * again maintianing the invariant.  As a result of the invariant no element
+ * is 'lost' when k is assigned its correct place in the heap.
+ *
+ * The time savings from this optimization are on the order of 15-20% for the
+ * average case. See Knuth, Vol. 3, page 158, problem 18.
+ *
+ * XXX Don't break the #define SELECT line, below.  Reiser cpp gets upset.
+ */
+#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
+	for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
+		child = base + child_i * size; \
+		if (child_i < nmemb && compar(child, child + size) < 0) { \
+			child += size; \
+			++child_i; \
+		} \
+		par = base + par_i * size; \
+		COPY(par, child, count, size, tmp1, tmp2); \
+	} \
+	for (;;) { \
+		child_i = par_i; \
+		par_i = child_i / 2; \
+		child = base + child_i * size; \
+		par = base + par_i * size; \
+		if (child_i == 1 || compar(k, par) < 0) { \
+			COPY(child, k, count, size, tmp1, tmp2); \
+			break; \
+		} \
+		COPY(child, par, count, size, tmp1, tmp2); \
 	} \
 }

@ -86,41 +137,49 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 * and worst.  While heapsort is faster than the worst case of quicksort,
 * the BSD quicksort does median selection so that the chance of finding
 * a data set that will trigger the worst case is nonexistent.  Heapsort's
- * only advantage over quicksort is that it requires no additional memory.
+ * only advantage over quicksort is that it requires little additional memory.
 */
-heapsort(bot, nmemb, size, compar)
-	register void *bot;
-	register size_t nmemb, size;
+int
+heapsort(vbase, nmemb, size, compar)
+	void *vbase;
+	size_t nmemb, size;
 	int (*compar) __P((const void *, const void *));
 {
-	register char *p, *t, ch;
 	register int cnt, i, j, l;
+	register char tmp, *tmp1, *tmp2;
+	char *base, *k, *p, *t;

 	if (nmemb <= 1)
 		return (0);
+
 	if (!size) {
 		errno = EINVAL;
 		return (-1);
 	}
+
+	if ((k = malloc(size)) == NULL)
+		return (-1);
+
 	/*
 	 * Items are numbered from 1 to nmemb, so offset from size bytes
 	 * below the starting address.
 	 */
-	bot -= size;
+	base = (char *)vbase - size;

 	for (l = nmemb / 2 + 1; --l;)
-		HEAP(l);
+		CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);

 	/*
 	 * For each element of the heap, save the largest element into its
-	 * final slot, then recreate the heap.
+	 * final slot, save the displaced element (k), then recreate the
+	 * heap.
 	 */
 	while (nmemb > 1) {
-		p = (char *)bot + size;
-		t = (char *)bot + nmemb * size;
-		SWAP(p, t);
+		COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
+		COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
 		--nmemb;
-		HEAP(1);
+		SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
 	}
+	free(k);
 	return (0);
 }
--- a/lib/libc/stdlib/merge.c
+++ b/lib/libc/stdlib/merge.c
@ -35,7 +35,8 @@
 */

 #if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)merge.c	8.2 (Berkeley) 2/14/94";
+/*static char sccsid[] = "from: @(#)merge.c	8.2 (Berkeley) 2/14/94";*/
+static char *rcsid = "$Id: merge.c,v 1.2 1994/06/16 05:26:36 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */

 /*
--- a/lib/libc/stdlib/qsort.3
+++ b/lib/libc/stdlib/qsort.3
@ -1,5 +1,5 @@
-.\" Copyright (c) 1990, 1991 The Regents of the University of California.
-.\" All rights reserved.
+.\" Copyright (c) 1990, 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" This code is derived from software contributed to Berkeley by
 .\" the American National Standards Committee X3, on Information
@ -33,14 +33,14 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\"     from: @(#)qsort.3	6.7 (Berkeley) 6/29/91
-.\"	$Id: qsort.3,v 1.2 1993/08/01 07:44:22 mycroft Exp $
+.\"     from: @(#)qsort.3	8.1 (Berkeley) 6/4/93
+.\"	$Id: qsort.3,v 1.3 1994/06/16 05:26:38 mycroft Exp $
 .\"
-.Dd June 29, 1991
+.Dd June 4, 1993
 .Dt QSORT 3
 .Os
 .Sh NAME
-.Nm qsort, heapsort
+.Nm qsort, heapsort, mergesort
 .Nd sort functions
 .Sh SYNOPSIS
 .Fd #include <stdlib.h>
@ -48,6 +48,8 @@
 .Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
 .Ft int
 .Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
+.Ft int
+.Fn mergesort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
 .Sh DESCRIPTION
 The
 .Fn qsort
@ -55,6 +57,10 @@ function is a modified partition-exchange sort, or quicksort.
 The
 .Fn heapsort
 function is a modified selection sort.
+The
+.Fn mergesort
+function is a modified merge sort with exponential search
+intended for sorting data with pre-existing order.
 .Pp
 The
 .Fn qsort
@ -66,11 +72,20 @@ objects, the initial member of which is pointed to by
 .Fa base .
 The size of each object is specified by
 .Fa size .
+.Fn Mergesort
+behaves similarly, but
+.Em requires
+that
+.Fa size
+be greater than
+.Dq "sizeof(void *) / 2" .
 .Pp
-The contents of the array are sorted in ascending order according to
+The contents of the array
+.Fa base
+are sorted in ascending order according to
 a comparison function pointed to by
 .Fa compar ,
-which is called with two arguments that point to the objects being
+which requires two arguments pointing to the objects being
 compared.
 .Pp
 The comparison function must return an integer less than, equal to, or
@ -85,6 +100,9 @@ are
 .Em not
 stable, that is, if two members compare as equal, their order in
 the sorted array is undefined.
+The function
+.Fn mergesort
+is stable.
 .Pp
 The
 .Fn qsort
@ -93,7 +111,7 @@ a variant of partition-exchange sorting; in particular, see D.E. Knuth's
 Algorithm Q.
 .Fn Qsort
 takes O N lg N average time.
-This implementation uses median selection to avoid the traditional
+This implementation uses median selection to avoid its
 O N**2 worst-case behavior.
 .Pp
 The
@ -106,7 +124,28 @@ Its
 .Em only
 advantage over
 .Fn qsort
-is that it uses no additional memory.
+is that it uses almost no additional memory; while
+.Fn qsort
+does not allocate memory, it is implemented using recursion.
+.Pp
+The function
+.Fn mergesort
+requires additional memory of size
+.Fa nmemb *
+.Fa size 
+bytes; it should be used only when space is not at a premium.
+.Fn Mergesort
+is optimized for data with pre-existing order; its worst case
+time is O N lg N; its best case is O N.
+.Pp
+Normally,
+.Fn qsort
+is faster than
+.Fn mergesort
+is faster than
+.Fn heapsort .
+Memory availability and pre-existing order in the data can make this
+untrue.
 .Sh RETURN VALUES
 The
 .Fn qsort
@ -115,8 +154,10 @@ returns no value.
 .Pp
 Upon successful completion,
 .Fn heapsort
-returns 0.
-Otherwise, it returns \-1 and the global variable
+and
+.Fn mergesort
+return 0.
+Otherwise, they return \-1 and the global variable
 .Va errno
 is set to indicate the error.
 .Sh ERRORS
@ -127,11 +168,23 @@ function succeeds unless:
 .It Bq Er EINVAL
 The
 .Fa size
-argument is zero.
+argument is zero, or,
+the
+.Fa size
+argument to
+.Fn mergesort
+is less than
+.Dq "sizeof(void *) / 2" .
+.It Bq Er ENOMEM
+.Fn Heapsort
+or
+.Fn mergesort
+were unable to allocate memory.
+.El
 .Sh COMPATIBILITY
 Previous versions of
 .Fn qsort
-did not permit the comparison routine to itself call
+did not permit the comparison routine itself to call
 .Fn qsort 3 .
 This is no longer true.
 .Sh SEE ALSO
@ -161,6 +214,18 @@ This is no longer true.
 .%T "Sorting and Searching"
 .%P pp. 114-123, 145-149
 .Re
+.Rs
+.%A Mcilroy, P.M.
+.%T "Optimistic Sorting and Information Theoretic Complexity"
+.%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms"
+.%V January 1992
+.Re
+.Rs
+.%A Bentley, J.L.
+.%T "Engineering a Sort Function"
+.%J "bentley@research.att.com"
+.%V January 1992
+.Re
 .Sh STANDARDS
 The
 .Fn qsort
--- a/lib/libc/stdlib/qsort.c
+++ b/lib/libc/stdlib/qsort.c
@ -1,6 +1,6 @@
 /*-
- * Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,245 +32,144 @@
 */

 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)qsort.c	5.9 (Berkeley) 2/23/91";*/
-static char *rcsid = "$Id: qsort.c,v 1.3 1993/08/26 00:48:06 jtc Exp $";
+/*static char sccsid[] = "from: @(#)qsort.c	8.1 (Berkeley) 6/4/93";*/
+static char *rcsid = "$Id: qsort.c,v 1.4 1994/06/16 05:26:39 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */

 #include <sys/types.h>
 #include <stdlib.h>

-/*
- * MTHRESH is the smallest partition for which we compare for a median
- * value instead of using the middle value.
- */
-#define	MTHRESH	6
+static inline char	*med3 __P((char *, char *, char *, int (*)()));
+static inline void	 swapfunc __P((char *, char *, int, int));
+
+#define min(a, b)	(a) < (b) ? a : b

 /*
- * THRESH is the minimum number of entries in a partition for continued
- * partitioning.
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
 */
-#define	THRESH	4
+#define swapcode(TYPE, parmi, parmj, n) { 		\
+	long i = (n) / sizeof (TYPE); 			\
+	register TYPE *pi = (TYPE *) (parmi); 		\
+	register TYPE *pj = (TYPE *) (parmj); 		\
+	do { 						\
+		register TYPE	t = *pi;		\
+		*pi++ = *pj;				\
+		*pj++ = t;				\
+        } while (--i > 0);				\
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(a, b, n, swaptype)
+	char *a, *b;
+	int n, swaptype;
+{
+	if(swaptype <= 1) 
+		swapcode(long, a, b, n)
+	else
+		swapcode(char, a, b, n)
+}
+
+#define swap(a, b)					\
+	if (swaptype == 0) {				\
+		long t = *(long *)(a);			\
+		*(long *)(a) = *(long *)(b);		\
+		*(long *)(b) = t;			\
+	} else						\
+		swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+static inline char *
+med3(a, b, c, cmp)
+	char *a, *b, *c;
+	int (*cmp)();
+{
+	return cmp(a, b) < 0 ?
+	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
+              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
+}

 void
-qsort(bot, nmemb, size, compar)
-	void *bot;
-	size_t nmemb, size;
-	int (*compar) __P((const void *, const void *));
+qsort(a, n, es, cmp)
+	void *a;
+	size_t n, es;
+	int (*cmp)();
 {
-	static void insertion_sort(), quick_sort();
+	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+	int d, r, swaptype, swap_cnt;

-	if (nmemb <= 1)
+loop:	SWAPINIT(a, es);
+	swap_cnt = 0;
+	if (n < 7) {
+		for (pm = a + es; pm < (char *) a + n * es; pm += es)
+			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
+			     pl -= es)
+				swap(pl, pl - es);
 		return;
-
-	if (nmemb >= THRESH)
-		quick_sort(bot, nmemb, size, compar);
-	else
-		insertion_sort(bot, nmemb, size, compar);
-}
-
-/*
- * Swap two areas of size number of bytes.  Although qsort(3) permits random
- * blocks of memory to be sorted, sorting pointers is almost certainly the
- * common case (and, were it not, could easily be made so).  Regardless, it
- * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
- * arithmetic gets lost in the time required for comparison function calls.
- */
-#define	SWAP(a, b) { \
-	cnt = size; \
-	do { \
-		ch = *a; \
-		*a++ = *b; \
-		*b++ = ch; \
-	} while (--cnt); \
-}
-
-/*
- * Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
- * of straight insertion sort after partitioning is complete is better than
- * sorting each small partition as it is created.  This isn't correct in this
- * implementation because comparisons require at least one (and often two)
- * function calls and are likely to be the dominating expense of the sort.
- * Doing a final insertion sort does more comparisons than are necessary
- * because it compares the "edges" and medians of the partitions which are
- * known to be already sorted.
- *
- * This is also the reasoning behind selecting a small THRESH value (see
- * Knuth, page 122, equation 26), since the quicksort algorithm does less
- * comparisons than the insertion sort.
- */
-#define	SORT(bot, n) { \
-	if (n > 1) \
-		if (n == 2) { \
-			t1 = bot + size; \
-			if (compar(t1, bot) < 0) \
-				SWAP(t1, bot); \
-		} else \
-			insertion_sort(bot, n, size, compar); \
-}
-
-static void
-quick_sort(bot, nmemb, size, compar)
-	register char *bot;
-	register int size;
-	int nmemb, (*compar)();
-{
-	register int cnt;
-	register u_char ch;
-	register char *top, *mid, *t1, *t2;
-	register int n1, n2;
-	char *bsv;
-	static void insertion_sort();
-
-	/* bot and nmemb must already be set. */
-partition:
-
-	/* find mid and top elements */
-	mid = bot + size * (nmemb >> 1);
-	top = bot + (nmemb - 1) * size;
-
-	/*
-	 * Find the median of the first, last and middle element (see Knuth,
-	 * Vol. 3, page 123, Eq. 28).  This test order gets the equalities
-	 * right.
-	 */
-	if (nmemb >= MTHRESH) {
-		n1 = compar(bot, mid);
-		n2 = compar(mid, top);
-		if (n1 < 0 && n2 > 0)
-			t1 = compar(bot, top) < 0 ? top : bot;
-		else if (n1 > 0 && n2 < 0)
-			t1 = compar(bot, top) > 0 ? top : bot;
-		else
-			t1 = mid;
-
-		/* if mid element not selected, swap selection there */
-		if (t1 != mid) {
-			SWAP(t1, mid);
-			mid -= size;
-		}
 	}
-
-	/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
-#define	didswap	n1
-#define	newbot	t1
-#define	replace	t2
-	didswap = 0;
-	for (bsv = bot;;) {
-		for (; bot < mid && compar(bot, mid) <= 0; bot += size);
-		while (top > mid) {
-			if (compar(mid, top) <= 0) {
-				top -= size;
-				continue;
-			}
-			newbot = bot + size;	/* value of bot after swap */
-			if (bot == mid)		/* top <-> mid, mid == top */
-				replace = mid = top;
-			else {			/* bot <-> top */
-				replace = top;
-				top -= size;
-			}
-			goto swap;
+	pm = a + (n / 2) * es;
+	if (n > 7) {
+		pl = a;
+		pn = a + (n - 1) * es;
+		if (n > 40) {
+			d = (n / 8) * es;
+			pl = med3(pl, pl + d, pl + 2 * d, cmp);
+			pm = med3(pm - d, pm, pm + d, cmp);
+			pn = med3(pn - 2 * d, pn - d, pn, cmp);
 		}
-		if (bot == mid)
+		pm = med3(pl, pm, pn, cmp);
+	}
+	swap(a, pm);
+	pa = pb = a + es;
+
+	pc = pd = a + (n - 1) * es;
+	for (;;) {
+		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
+			if (r == 0) {
+				swap_cnt = 1;
+				swap(pa, pb);
+				pa += es;
+			}
+			pb += es;
+		}
+		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
+			if (r == 0) {
+				swap_cnt = 1;
+				swap(pc, pd);
+				pd -= es;
+			}
+			pc -= es;
+		}
+		if (pb > pc)
 			break;
-
-		/* bot <-> mid, mid == bot */
-		replace = mid;
-		newbot = mid = bot;		/* value of bot after swap */
-		top -= size;
-
-swap:		SWAP(bot, replace);
-		bot = newbot;
-		didswap = 1;
+		swap(pb, pc);
+		swap_cnt = 1;
+		pb += es;
+		pc -= es;
 	}
-
-	/*
-	 * Quicksort behaves badly in the presence of data which is already
-	 * sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
-	 * To avoid this worst case behavior, if a re-partitioning occurs
-	 * without swapping any elements, it is not further partitioned and
-	 * is insert sorted.  This wins big with almost sorted data sets and
-	 * only loses if the data set is very strangely partitioned.  A fix
-	 * for those data sets would be to return prematurely if the insertion
-	 * sort routine is forced to make an excessive number of swaps, and
-	 * continue the partitioning.
-	 */
-	if (!didswap) {
-		insertion_sort(bsv, nmemb, size, compar);
+	if (swap_cnt == 0) {  /* Switch to insertion sort */
+		for (pm = a + es; pm < (char *) a + n * es; pm += es)
+			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; 
+			     pl -= es)
+				swap(pl, pl - es);
 		return;
 	}

-	/*
-	 * Re-partition or sort as necessary.  Note that the mid element
-	 * itself is correctly positioned and can be ignored.
-	 */
-#define	nlower	n1
-#define	nupper	n2
-	bot = bsv;
-	nlower = (mid - bot) / size;	/* size of lower partition */
-	mid += size;
-	nupper = nmemb - nlower - 1;	/* size of upper partition */
-
-	/*
-	 * If must call recursively, do it on the smaller partition; this
-	 * bounds the stack to lg N entries.
-	 */
-	if (nlower > nupper) {
-		if (nupper >= THRESH)
-			quick_sort(mid, nupper, size, compar);
-		else {
-			SORT(mid, nupper);
-			if (nlower < THRESH) {
-				SORT(bot, nlower);
-				return;
-			}
-		}
-		nmemb = nlower;
-	} else {
-		if (nlower >= THRESH)
-			quick_sort(bot, nlower, size, compar);
-		else {
-			SORT(bot, nlower);
-			if (nupper < THRESH) {
-				SORT(mid, nupper);
-				return;
-			}
-		}
-		bot = mid;
-		nmemb = nupper;
-	}
-	goto partition;
-	/* NOTREACHED */
-}
-
-static void
-insertion_sort(bot, nmemb, size, compar)
-	char *bot;
-	register int size;
-	int nmemb, (*compar)();
-{
-	register int cnt;
-	register u_char ch;
-	register char *s1, *s2, *t1, *t2, *top;
-
-	/*
-	 * A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
-	 * S).  Insertion sort has the same worst case as most simple sorts
-	 * (O N^2).  It gets used here because it is (O N) in the case of
-	 * sorted data.
-	 */
-	top = bot + nmemb * size;
-	for (t1 = bot + size; t1 < top;) {
-		for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
-		if (t1 != (t2 += size)) {
-			/* Bubble bytes up through each element. */
-			for (cnt = size; cnt--; ++t1) {
-				ch = *t1;
-				for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
-					*s1 = *s2;
-				*s1 = ch;
-			}
-		} else
-			t1 += size;
+	pn = a + n * es;
+	r = min(pa - (char *)a, pb - pa);
+	vecswap(a, pb - r, r);
+	r = min(pd - pc, pn - pd - es);
+	vecswap(pb, pn - r, r);
+	if ((r = pb - pa) > es)
+		qsort(a, r / es, es, cmp);
+	if ((r = pd - pc) > es) { 
+		/* Iterate rather than recurse to save stack space */
+		a = pn - r;
+		n = r / es;
+		goto loop;
 	}
+/*		qsort(pn - r, r / es, es, cmp);*/
 }
--- a/lib/libc/stdlib/radixsort.3
+++ b/lib/libc/stdlib/radixsort.3
@ -1,5 +1,5 @@
-.\" Copyright (c) 1990, 1991 The Regents of the University of California.
-.\" All rights reserved.
+.\" Copyright (c) 1990, 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
@ -29,10 +29,10 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\"     from: @(#)radixsort.3	5.5 (Berkeley) 4/19/91
-.\"	$Id: radixsort.3,v 1.2 1993/08/01 07:44:21 mycroft Exp $
+.\"     from: @(#)radixsort.3	8.2 (Berkeley) 1/27/94
+.\"	$Id: radixsort.3,v 1.3 1994/06/16 05:26:40 mycroft Exp $
 .\"
-.Dd April 19, 1991
+.Dd January 27, 1994
 .Dt RADIXSORT 3
 .Os
 .Sh NAME
@ -42,27 +42,23 @@
 .Fd #include <limits.h>
 .Fd #include <stdlib.h>
 .Ft int
-.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_char endbyte"
+.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
+.Ft int
+.Fn sradixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
 .Sh DESCRIPTION
 The
 .Fn radixsort
-function
-is a modified radix sort.
+and
+.Fn sradixsort
+functions
+are implementations of radix sort.
 .Pp
-The
-.Fn radixsort
-function sorts an array of
-.Fa nmemb
-pointers to byte strings, the initial member of which is referenced
-by
+These functions sort an array of pointers to byte strings, the initial
+member of which is referenced by
 .Fa base .
 The byte strings may contain any values; the end of each string
 is denoted by the user-specified value
 .Fa endbyte .
-The contents of the array are sorted in ascending order according
-to the
-.Tn ASCII
-order of the byte strings they reference.
 .Pp
 Applications may specify a sort order by providing the
 .Fa table
@ -74,38 +70,58 @@ must reference an array of
 .Dv UCHAR_MAX
 + 1 bytes which contains the sort
 weight of each possible byte value.
-The end-of-string byte must have a sort weight of 0.
+The end-of-string byte must have a sort weight of 0 or 255
+(for sorting in reverse order).
 More than one byte may have the same sort weight.
 The
 .Fa table
 argument
 is useful for applications which wish to sort different characters
-equally; for example, providing a table with the same weights
+equally, for example, providing a table with the same weights
 for A-Z as for a-z will result in a case-insensitive sort.
+If
+.Fa table
+is NULL, the contents of the array are sorted in ascending order
+according to the
+.Tn ASCII
+order of the byte strings they reference and
+.Fa endbyte
+has a sorting weight of 0.
+.Pp
+The
+.Fn sradixsort
+function is stable, that is, if two elements compare as equal, their
+order in the sorted array is unchanged.
+The
+.Fn sradixsort
+function uses additional memory sufficient to hold
+.Fa nmemb
+pointers.
 .Pp
 The
 .Fn radixsort
-function
-is stable, that is, if two elements compare as equal, their order in
-the sorted array is unchanged.
+function is not stable, but uses no additional memory.
 .Pp
-The
-.Fn radixsort
-function
-is a variant of most-significant-byte radix sorting; in particular, see
-D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
-The
-.Fn radixsort
-function
-takes linear time relative to the number of bytes in the strings.
+These functions are variants of most-significant-byte radix sorting; in
+particular, see D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
+They take linear time relative to the number of bytes in the strings.
 .Sh RETURN VALUES
 Upon successful completion 0 is returned.
 Otherwise, \-1 is returned and the global variable 
 .Va errno
 is set to indicate the error.
 .Sh ERRORS
-The
-.Fn radixsort
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The value of the
+.Fa endbyte
+element of
+.Fa table
+is not 0 or 255.
+.El
+.Pp
+Additionally, the
+.Fn sradixsort
 function
 may fail and set
 .Va errno
@ -131,14 +147,15 @@ for any of the errors specified for the library routine
 .%V Vol. 16
 .%N No. 6
 .Re
+.Rs
+.%A McIlroy, P.
+.%D 1993
+.%B "Engineering Radix Sort"
+.%T "Computing Systems"
+.%V Vol. 6:1
+.%P pp. 5-27
+.Re
 .Sh HISTORY
 The
 .Fn radixsort
-function is
-.Ud .
-.Sh BUGS
-The
-.Fa nmemb
-argument
-must be less than the maximum integer,
-.Dv INT_MAX .
+function first appeared in 4.4BSD.
--- a/lib/libc/stdlib/radixsort.c
+++ b/lib/libc/stdlib/radixsort.c
@ -1,6 +1,9 @@
 /*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy and by Dan Bernstein at New York University, 
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,259 +35,285 @@
 */

 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)radixsort.c	5.7 (Berkeley) 2/23/91";*/
-static char *rcsid = "$Id: radixsort.c,v 1.3 1993/08/26 00:48:07 jtc Exp $";
+/*static char sccsid[] = "from: @(#)radixsort.c	8.1 (Berkeley) 6/4/93";*/
+static char *rcsid = "$Id: radixsort.c,v 1.4 1994/06/16 05:26:44 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */

+/*
+ * Radixsort routines.
+ * 
+ * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
+ * Use radixsort(a, n, trace, endchar) for this case.
+ * 
+ * For stable sorting (using N extra pointers) use sradixsort(), which calls
+ * r_sort_b().
+ * 
+ * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
+ * "Engineering Radix Sort".
+ */
+
 #include <sys/types.h>
-#include <limits.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <string.h>
+#include <errno.h>

-/*
- * __rspartition is the cutoff point for a further partitioning instead
- * of a shellsort.  If it changes check __rsshell_increments.  Both of
- * these are exported, as the best values are data dependent.
- */
-#define	NPARTITION	40
-int __rspartition = NPARTITION;
-int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 };
+typedef struct {
+	const u_char **sa;
+	int sn, si;
+} stack;

-/*
- * Stackp points to context structures, where each structure schedules a
- * partitioning.  Radixsort exits when the stack is empty.
- *
- * If the buckets are placed on the stack randomly, the worst case is when
- * all the buckets but one contain (npartitions + 1) elements and the bucket
- * pushed on the stack last contains the rest of the elements.  In this case,
- * stack growth is bounded by:
- *
- *	limit = (nelements / (npartitions + 1)) - 1;
- *
- * This is a very large number, 52,377,648 for the maximum 32-bit signed int.
- *
- * By forcing the largest bucket to be pushed on the stack first, the worst
- * case is when all but two buckets each contain (npartitions + 1) elements,
- * with the remaining elements split equally between the first and last
- * buckets pushed on the stack.  In this case, stack growth is bounded when:
- *
- *	for (partition_cnt = 0; nelements > npartitions; ++partition_cnt)
- *		nelements =
- *		    (nelements - (npartitions + 1) * (nbuckets - 2)) / 2;
- * The bound is:
- *
- *	limit = partition_cnt * (nbuckets - 1);
- *
- * This is a much smaller number, 4590 for the maximum 32-bit signed int.
- */
-#define	NBUCKETS	(UCHAR_MAX + 1)
+static inline void simplesort
+	    __P((const u_char **, int, int, const u_char *, u_int));
+static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
+static void r_sort_b __P((const u_char **,
+	    const u_char **, int, int, const u_char *, u_int));

-typedef struct _stack {
-	const u_char **bot;
-	int indx, nmemb;
-} CONTEXT;
+#define	THRESHOLD	20		/* Divert to simplesort(). */
+#define	SIZE		512		/* Default stack size. */

-#define	STACKPUSH { \
-	stackp->bot = p; \
-	stackp->nmemb = nmemb; \
-	stackp->indx = indx; \
-	++stackp; \
-}
-#define	STACKPOP { \
-	if (stackp == stack) \
-		break; \
-	--stackp; \
-	bot = stackp->bot; \
-	nmemb = stackp->nmemb; \
-	indx = stackp->indx; \
+#define SETUP {								\
+	if (tab == NULL) {						\
+		tr = tr0;						\
+		for (c = 0; c < endch; c++)				\
+			tr0[c] = c + 1;					\
+		tr0[c] = 0;						\
+		for (c++; c < 256; c++)					\
+			tr0[c] = c;					\
+		endch = 0;						\
+	} else {							\
+		endch = tab[endch];					\
+		tr = tab;						\
+		if (endch != 0 && endch != 255) {			\
+			errno = EINVAL;					\
+			return (-1);					\
+		}							\
+	}								\
 }

-/*
- * A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5,
- * Ex. 10 and 12.  Also, "Three Partition Refinement Algorithms, Paige
- * and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987.
- *
- * This uses a simple sort as soon as a bucket crosses a cutoff point,
- * rather than sorting the entire list after partitioning is finished.
- * This should be an advantage.
- *
- * This is pure MSD instead of LSD of some number of MSD, switching to
- * the simple sort as soon as possible.  Takes linear time relative to
- * the number of bytes in the strings.
- */
 int
-#if __STDC__
-radixsort(const u_char **l1, int nmemb, const u_char *tab, u_char endbyte)
-#else
-radixsort(l1, nmemb, tab, endbyte)
-	const u_char **l1;
-	register int nmemb;
-	const u_char *tab;
-	u_char endbyte;
-#endif
+radixsort(a, n, tab, endch)
+	const u_char **a, *tab;
+	int n;
+	u_int endch;
 {
-	register int i, indx, t1, t2;
-	register const u_char **l2;
-	register const u_char **p;
-	register const u_char **bot;
-	register const u_char *tr;
-	CONTEXT *stack, *stackp;
-	int c[NBUCKETS + 1], max;
-	u_char ltab[NBUCKETS];
-	static void shellsort();
+	const u_char *tr;
+	int c;
+	u_char tr0[256];

-	if (nmemb <= 1)
-		return(0);
+	SETUP;
+	r_sort_a(a, n, 0, tr, endch);
+	return (0);
+}

-	/*
-	 * T1 is the constant part of the equation, the number of elements
-	 * represented on the stack between the top and bottom entries.
-	 * It doesn't get rounded as the divide by 2 rounds down (correct
-	 * for a value being subtracted).  T2, the nelem value, has to be
-	 * rounded up before each divide because we want an upper bound;
-	 * this could overflow if nmemb is the maximum int.
-	 */
-	t1 = ((__rspartition + 1) * (NBUCKETS - 2)) >> 1;
-	for (i = 0, t2 = nmemb; t2 > __rspartition; i += NBUCKETS - 1)
-		t2 = ((t2 + 1) >> 1) - t1;
-	if (i) {
-		if (!(stack = stackp = (CONTEXT *)malloc(i * sizeof(CONTEXT))))
-			return(-1);
-	} else
-		stack = stackp = NULL;
+int
+sradixsort(a, n, tab, endch)
+	const u_char **a, *tab;
+	int n;
+	u_int endch;
+{
+	const u_char *tr, **ta;
+	int c;
+	u_char tr0[256];

-	/*
-	 * There are two arrays, one provided by the user (l1), and the
-	 * temporary one (l2).  The data is sorted to the temporary stack,
-	 * and then copied back.  The speedup of using index to determine
-	 * which stack the data is on and simply swapping stacks back and
-	 * forth, thus avoiding the copy every iteration, turns out to not
-	 * be any faster than the current implementation.
-	 */
-	if (!(l2 = (const u_char **)malloc(sizeof(u_char *) * nmemb)))
-		return(-1);
-
-	/*
-	 * Tr references a table of sort weights; multiple entries may
-	 * map to the same weight; EOS char must have the lowest weight.
-	 */
-	if (tab)
-		tr = tab;
+	SETUP;
+	if (n < THRESHOLD)
+		simplesort(a, n, 0, tr, endch);
 	else {
-		for (t1 = 0, t2 = endbyte; t1 < t2; ++t1)
-			ltab[t1] = t1 + 1;
-		ltab[t2] = 0;
-		for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1)
-			ltab[t1] = t1;
-		tr = ltab;
+		if ((ta = malloc(n * sizeof(a))) == NULL)
+			return (-1);
+		r_sort_b(a, ta, n, 0, tr, endch);
+		free(ta);
 	}
-
-	/* First sort is entire stack */
-	bot = l1;
-	indx = 0;
-
-	for (;;) {
-		/* Clear bucket count array */
-		bzero((char *)c, sizeof(c));
-
-		/*
-		 * Compute number of items that sort to the same bucket
-		 * for this index.
-		 */
-		for (p = bot, i = nmemb; --i >= 0;)
-			++c[tr[(*p++)[indx]]];
-
-		/*
-		 * Sum the number of characters into c, dividing the temp
-		 * stack into the right number of buckets for this bucket,
-		 * this index.  C contains the cumulative total of keys
-		 * before and included in this bucket, and will later be
-		 * used as an index to the bucket.  c[NBUCKETS] contains
-		 * the total number of elements, for determining how many
-		 * elements the last bucket contains.  At the same time
-		 * find the largest bucket so it gets pushed first.
-		 */
-		for (i = max = t1 = 0, t2 = __rspartition; i <= NBUCKETS; ++i) {
-			if (c[i] > t2) {
-				t2 = c[i];
-				max = i;
-			}
-			t1 = c[i] += t1;
-		}
-
-		/*
-		 * Partition the elements into buckets; c decrements through
-		 * the bucket, and ends up pointing to the first element of
-		 * the bucket.
-		 */
-		for (i = nmemb; --i >= 0;) {
-			--p;
-			l2[--c[tr[(*p)[indx]]]] = *p;
-		}
-
-		/* Copy the partitioned elements back to user stack */
-		bcopy(l2, bot, nmemb * sizeof(u_char *));
-
-		++indx;
-		/*
-		 * Sort buckets as necessary; don't sort c[0], it's the
-		 * EOS character bucket, and nothing can follow EOS.
-		 */
-		for (i = max; i; --i) {
-			if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
-				continue;
-			p = bot + t1;
-			if (nmemb > __rspartition)
-				STACKPUSH
-			else
-				shellsort(p, indx, nmemb, tr);
-		}
-		for (i = max + 1; i < NBUCKETS; ++i) {
-			if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
-				continue;
-			p = bot + t1;
-			if (nmemb > __rspartition)
-				STACKPUSH
-			else
-				shellsort(p, indx, nmemb, tr);
-		}
-		/* Break out when stack is empty */
-		STACKPOP
-	}
-
-	free((char *)l2);
-	free((char *)stack);
-	return(0);
+	return (0);
 }

-/*
- * Shellsort (diminishing increment sort) from Data Structures and
- * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
- * see also Knuth Vol. 3, page 84.  The increments are selected from
- * formula (8), page 95.  Roughly O(N^3/2).
- */
-static void
-shellsort(p, indx, nmemb, tr)
-	register u_char **p, *tr;
-	register int indx, nmemb;
+#define empty(s)	(s >= sp)
+#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t)	t = a, a = b, b = t
+
+/* Unstable, in-place sort. */
+void
+r_sort_a(a, n, i, tr, endch)
+	const u_char **a;
+	int n, i;
+	const u_char *tr;
+	u_int endch;
 {
-	register u_char ch, *s1, *s2;
-	register int incr, *incrp, t1, t2;
+	static int count[256], nc, bmin;
+	register int c;
+	register const u_char **ak, *r;
+	stack s[SIZE], *sp, *sp0, *sp1, temp;
+	int *cp, bigc;
+	const u_char **an, *t, **aj, **top[256];

-	for (incrp = __rsshell_increments; incr = *incrp++;)
-		for (t1 = incr; t1 < nmemb; ++t1)
-			for (t2 = t1 - incr; t2 >= 0;) {
-				s1 = p[t2] + indx;
-				s2 = p[t2 + incr] + indx;
-				while ((ch = tr[*s1++]) == tr[*s2] && ch)
-					++s2;
-				if (ch > tr[*s2]) {
-					s1 = p[t2];
-					p[t2] = p[t2 + incr];
-					p[t2 + incr] = s1;
-					t2 -= incr;
-				} else
-					break;
+	/* Set up stack. */
+	sp = s;
+	push(a, n, i);
+	while (!empty(s)) {
+		pop(a, n, i);
+		if (n < THRESHOLD) {
+			simplesort(a, n, i, tr, endch);
+			continue;
+		}
+		an = a + n;
+
+		/* Make character histogram. */
+		if (nc == 0) {
+			bmin = 255;	/* First occupied bin, excluding eos. */
+			for (ak = a; ak < an;) {
+				c = tr[(*ak++)[i]];
+				if (++count[c] == 1 && c != endch) {
+					if (c < bmin)
+						bmin = c;
+					nc++;
+				}
 			}
+			if (sp + nc > s + SIZE) {	/* Get more stack. */
+				r_sort_a(a, n, i, tr, endch);
+				continue;
+			}
+		}
+
+		/*
+		 * Set top[]; push incompletely sorted bins onto stack.
+		 * top[] = pointers to last out-of-place element in bins.
+		 * count[] = counts of elements in bins.
+		 * Before permuting: top[c-1] + count[c] = top[c];
+		 * during deal: top[c] counts down to top[c-1].
+		 */
+		sp0 = sp1 = sp;		/* Stack position of biggest bin. */
+		bigc = 2;		/* Size of biggest bin. */
+		if (endch == 0)		/* Special case: set top[eos]. */
+			top[0] = ak = a + count[0];
+		else {
+			ak = a;
+			top[255] = an;
+		}
+		for (cp = count + bmin; nc > 0; cp++) {
+			while (*cp == 0)	/* Find next non-empty pile. */
+				cp++;
+			if (*cp > 1) {
+				if (*cp > bigc) {
+					bigc = *cp;
+					sp1 = sp;
+				}
+				push(ak, *cp, i+1);
+			}
+			top[cp-count] = ak += *cp;
+			nc--;
+		}
+		swap(*sp0, *sp1, temp);	/* Play it safe -- biggest bin last. */
+
+		/*
+		 * Permute misplacements home.  Already home: everything
+		 * before aj, and in bin[c], items from top[c] on.
+		 * Inner loop:
+		 *	r = next element to put in place;
+		 *	ak = top[r[i]] = location to put the next element.
+		 *	aj = bottom of 1st disordered bin.
+		 * Outer loop:
+		 *	Once the 1st disordered bin is done, ie. aj >= ak,
+		 *	aj<-aj + count[c] connects the bins in a linked list;
+		 *	reset count[c].
+		 */
+		for (aj = a; aj < an;  *aj = r, aj += count[c], count[c] = 0)
+			for (r = *aj;  aj < (ak = --top[c = tr[r[i]]]);)
+				swap(*ak, r, t);
+	}
+}
+
+/* Stable sort, requiring additional memory. */
+void
+r_sort_b(a, ta, n, i, tr, endch)
+	const u_char **a, **ta;
+	int n, i;
+	const u_char *tr;
+	u_int endch;
+{
+	static int count[256], nc, bmin;
+	register int c;
+	register const u_char **ak, **ai;
+	stack s[512], *sp, *sp0, *sp1, temp;
+	const u_char **top[256];
+	int *cp, bigc;
+
+	sp = s;
+	push(a, n, i);
+	while (!empty(s)) {
+		pop(a, n, i);
+		if (n < THRESHOLD) {
+			simplesort(a, n, i, tr, endch);
+			continue;
+		}
+
+		if (nc == 0) {
+			bmin = 255;
+			for (ak = a + n; --ak >= a;) {
+				c = tr[(*ak)[i]];
+				if (++count[c] == 1 && c != endch) {
+					if (c < bmin)
+						bmin = c;
+					nc++;
+				}
+			}
+			if (sp + nc > s + SIZE) {
+				r_sort_b(a, ta, n, i, tr, endch);
+				continue;
+			}
+		}
+
+		sp0 = sp1 = sp;
+		bigc = 2;
+		if (endch == 0) {
+			top[0] = ak = a + count[0];
+			count[0] = 0;
+		} else {
+			ak = a;
+			top[255] = a + n;
+			count[255] = 0;
+		}
+		for (cp = count + bmin; nc > 0; cp++) {
+			while (*cp == 0)
+				cp++;
+			if ((c = *cp) > 1) {
+				if (c > bigc) {
+					bigc = c;
+					sp1 = sp;
+				}
+				push(ak, c, i+1);
+			}
+			top[cp-count] = ak += c;
+			*cp = 0;			/* Reset count[]. */
+			nc--;
+		}
+		swap(*sp0, *sp1, temp);
+
+		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
+			*--ak = *--ai;
+		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
+			*--top[tr[(*ak)[i]]] = *ak;
+	}
+}
+		
+static inline void
+simplesort(a, n, b, tr, endch)	/* insertion sort */
+	register const u_char **a;
+	int n, b;
+	register const u_char *tr;
+	u_int endch;
+{
+	register u_char ch;
+	const u_char  **ak, **ai, *s, *t;
+
+	for (ak = a+1; --n >= 1; ak++)
+		for (ai = ak; ai > a; ai--) {
+			for (s = ai[0] + b, t = ai[-1] + b;
+			    (ch = tr[*s]) != endch; s++, t++)
+				if (ch != tr[*t])
+					break;
+			if (ch >= tr[*t])
+				break;
+			swap(ai[0], ai[-1], s);
+		}
 }