Add RCS ids.

1994-06-16 05:26:34 +00:00 · 1994-06-16 05:26:34 +00:00 · 2f86deea8e
parent 2c1d50080b
commit 2f86deea8e
6 changed files with 617 additions and 547 deletions
--- a/lib/libc/stdlib/heapsort.c
+++ b/lib/libc/stdlib/heapsort.c
@ -1,6 +1,9 @@
 /*-
- * Copyright (c) 1991 The Regents of the University of California.
+ * Copyright (c) 1991, 1993
- * All rights reserved.
+ *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,14 +35,14 @@
 */
 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)heapsort.c	5.1 (Berkeley) 6/4/91";*/
+/*static char sccsid[] = "from: @(#)heapsort.c	8.1 (Berkeley) 6/4/93";*/
-static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
+static char *rcsid = "$Id: heapsort.c,v 1.4 1994/06/16 05:26:34 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 #include <sys/types.h>
 #include <errno.h>
 #include <stdlib.h>
 #include <stddef.h>
 /*
 * Swap two areas of size number of bytes.  Although qsort(3) permits random
@ -48,13 +51,23 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
 * arithmetic gets lost in the time required for comparison function calls.
 */
-#define	SWAP(a, b) { \
+#define	SWAP(a, b, count, size, tmp) { \
-	cnt = size; \
+	count = size; \
 	do { \
-		ch = *a; \
+		tmp = *a; \
 		*a++ = *b; \
-		*b++ = ch; \
+		*b++ = tmp; \
-	} while (--cnt); \
+	} while (--count); \
 }
 /* Copy one block of size size to another. */
 #define COPY(a, b, count, size, tmp1, tmp2) { \
 	count = size; \
 	tmp1 = a; \
 	tmp2 = b; \
 	do { \
 		*tmp1++ = *tmp2++; \
 	} while (--count); \
 }
 /*
@ -63,21 +76,59 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 *
 * There two cases.  If j == nmemb, select largest of Ki and Kj.  If
 * j < nmemb, select largest of Ki, Kj and Kj+1.
 *
 * The initial value depends on if we're building the initial heap or
 * reconstructing it after saving a value.
 */
-#define	HEAP(initval) { \
+#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
-	for (i = initval; (j = i * 2) <= nmemb; i = j) { \
+	for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
-		p = (char *)bot + j * size; \
+	    par_i = child_i) { \
-		if (j < nmemb && compar(p, p + size) < 0) { \
+		child = base + child_i * size; \
-			p += size; \
+		if (child_i < nmemb && compar(child, child + size) < 0) { \
-			++j; \
+			child += size; \
 			++child_i; \
 		} \
-		t = (char *)bot + i * size; \
+		par = base + par_i * size; \
-		if (compar(p, t) <= 0) \
+		if (compar(child, par) <= 0) \
 			break; \
-		SWAP(t, p); \
+		SWAP(par, child, count, size, tmp); \
 	} \
 }
 /*
 * Select the top of the heap and 'heapify'.  Since by far the most expensive
 * action is the call to the compar function, a considerable optimization
 * in the average case can be achieved due to the fact that k, the displaced
 * elememt, is ususally quite small, so it would be preferable to first
 * heapify, always maintaining the invariant that the larger child is copied
 * over its parent's record.
 *
 * Then, starting from the *bottom* of the heap, finding k's correct place,
 * again maintianing the invariant.  As a result of the invariant no element
 * is 'lost' when k is assigned its correct place in the heap.
 *
 * The time savings from this optimization are on the order of 15-20% for the
 * average case. See Knuth, Vol. 3, page 158, problem 18.
 *
 * XXX Don't break the #define SELECT line, below.  Reiser cpp gets upset.
 */
 #define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
 	for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
 		child = base + child_i * size; \
 		if (child_i < nmemb && compar(child, child + size) < 0) { \
 			child += size; \
 			++child_i; \
 		} \
 		par = base + par_i * size; \
 		COPY(par, child, count, size, tmp1, tmp2); \
 	} \
 	for (;;) { \
 		child_i = par_i; \
 		par_i = child_i / 2; \
 		child = base + child_i * size; \
 		par = base + par_i * size; \
 		if (child_i == 1 || compar(k, par) < 0) { \
 			COPY(child, k, count, size, tmp1, tmp2); \
 			break; \
 		} \
 		COPY(child, par, count, size, tmp1, tmp2); \
 	} \
 }
@ -86,41 +137,49 @@ static char *rcsid = "$Id: heapsort.c,v 1.3 1993/08/26 00:48:00 jtc Exp $";
 * and worst.  While heapsort is faster than the worst case of quicksort,
 * the BSD quicksort does median selection so that the chance of finding
 * a data set that will trigger the worst case is nonexistent.  Heapsort's
- * only advantage over quicksort is that it requires no additional memory.
+ * only advantage over quicksort is that it requires little additional memory.
 */
-heapsort(bot, nmemb, size, compar)
+int
-	register void *bot;
+heapsort(vbase, nmemb, size, compar)
-	register size_t nmemb, size;
+	void *vbase;
 	size_t nmemb, size;
 	int (*compar) __P((const void *, const void *));
 {
 	register char *p, *t, ch;
 	register int cnt, i, j, l;
 	register char tmp, *tmp1, *tmp2;
 	char *base, *k, *p, *t;
 	if (nmemb <= 1)
 		return (0);
 	if (!size) {
 		errno = EINVAL;
 		return (-1);
 	}
 	if ((k = malloc(size)) == NULL)
 		return (-1);
 	/*
 	 * Items are numbered from 1 to nmemb, so offset from size bytes
 	 * below the starting address.
 	 */
-	bot -= size;
+	base = (char *)vbase - size;
 	for (l = nmemb / 2 + 1; --l;)
-		HEAP(l);
+		CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
 	/*
 	 * For each element of the heap, save the largest element into its
-	 * final slot, then recreate the heap.
+	 * final slot, save the displaced element (k), then recreate the
 	 * heap.
 	 */
 	while (nmemb > 1) {
-		p = (char *)bot + size;
+		COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
-		t = (char *)bot + nmemb * size;
+		COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
 		SWAP(p, t);
 		--nmemb;
-		HEAP(1);
+		SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
 	}
 	free(k);
 	return (0);
 }
--- a/lib/libc/stdlib/merge.c
+++ b/lib/libc/stdlib/merge.c
@ -35,7 +35,8 @@
 */
 #if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)merge.c	8.2 (Berkeley) 2/14/94";
+/*static char sccsid[] = "from: @(#)merge.c	8.2 (Berkeley) 2/14/94";*/
 static char *rcsid = "$Id: merge.c,v 1.2 1994/06/16 05:26:36 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */
 /*
--- a/lib/libc/stdlib/qsort.3
+++ b/lib/libc/stdlib/qsort.3
@ -1,5 +1,5 @@
-.\" Copyright (c) 1990, 1991 The Regents of the University of California.
+.\" Copyright (c) 1990, 1991, 1993
-.\" All rights reserved.
+.\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" This code is derived from software contributed to Berkeley by
 .\" the American National Standards Committee X3, on Information
@ -33,14 +33,14 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\"     from: @(#)qsort.3	6.7 (Berkeley) 6/29/91
+.\"     from: @(#)qsort.3	8.1 (Berkeley) 6/4/93
-.\"	$Id: qsort.3,v 1.2 1993/08/01 07:44:22 mycroft Exp $
+.\"	$Id: qsort.3,v 1.3 1994/06/16 05:26:38 mycroft Exp $
 .\"
-.Dd June 29, 1991
+.Dd June 4, 1993
 .Dt QSORT 3
 .Os
 .Sh NAME
-.Nm qsort, heapsort
+.Nm qsort, heapsort, mergesort
 .Nd sort functions
 .Sh SYNOPSIS
 .Fd #include <stdlib.h>
@ -48,6 +48,8 @@
 .Fn qsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
 .Ft int
 .Fn heapsort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
 .Ft int
 .Fn mergesort "void *base" "size_t nmemb" "size_t size" "int (*compar)(const void *, const void *)"
 .Sh DESCRIPTION
 The
 .Fn qsort
@ -55,6 +57,10 @@ function is a modified partition-exchange sort, or quicksort.
 The
 .Fn heapsort
 function is a modified selection sort.
 The
 .Fn mergesort
 function is a modified merge sort with exponential search
 intended for sorting data with pre-existing order.
 .Pp
 The
 .Fn qsort
@ -66,11 +72,20 @@ objects, the initial member of which is pointed to by
 .Fa base .
 The size of each object is specified by
 .Fa size .
 .Fn Mergesort
 behaves similarly, but
 .Em requires
 that
 .Fa size
 be greater than
 .Dq "sizeof(void *) / 2" .
 .Pp
-The contents of the array are sorted in ascending order according to
+The contents of the array
 .Fa base
 are sorted in ascending order according to
 a comparison function pointed to by
 .Fa compar ,
-which is called with two arguments that point to the objects being
+which requires two arguments pointing to the objects being
 compared.
 .Pp
 The comparison function must return an integer less than, equal to, or
@ -85,6 +100,9 @@ are
 .Em not
 stable, that is, if two members compare as equal, their order in
 the sorted array is undefined.
 The function
 .Fn mergesort
 is stable.
 .Pp
 The
 .Fn qsort
@ -93,7 +111,7 @@ a variant of partition-exchange sorting; in particular, see D.E. Knuth's
 Algorithm Q.
 .Fn Qsort
 takes O N lg N average time.
-This implementation uses median selection to avoid the traditional
+This implementation uses median selection to avoid its
 O N**2 worst-case behavior.
 .Pp
 The
@ -106,7 +124,28 @@ Its
 .Em only
 advantage over
 .Fn qsort
-is that it uses no additional memory.
+is that it uses almost no additional memory; while
 .Fn qsort
 does not allocate memory, it is implemented using recursion.
 .Pp
 The function
 .Fn mergesort
 requires additional memory of size
 .Fa nmemb *
 .Fa size 
 bytes; it should be used only when space is not at a premium.
 .Fn Mergesort
 is optimized for data with pre-existing order; its worst case
 time is O N lg N; its best case is O N.
 .Pp
 Normally,
 .Fn qsort
 is faster than
 .Fn mergesort
 is faster than
 .Fn heapsort .
 Memory availability and pre-existing order in the data can make this
 untrue.
 .Sh RETURN VALUES
 The
 .Fn qsort
@ -115,8 +154,10 @@ returns no value.
 .Pp
 Upon successful completion,
 .Fn heapsort
-returns 0.
+and
-Otherwise, it returns \-1 and the global variable
+.Fn mergesort
 return 0.
 Otherwise, they return \-1 and the global variable
 .Va errno
 is set to indicate the error.
 .Sh ERRORS
@ -127,11 +168,23 @@ function succeeds unless:
 .It Bq Er EINVAL
 The
 .Fa size
-argument is zero.
+argument is zero, or,
 the
 .Fa size
 argument to
 .Fn mergesort
 is less than
 .Dq "sizeof(void *) / 2" .
 .It Bq Er ENOMEM
 .Fn Heapsort
 or
 .Fn mergesort
 were unable to allocate memory.
 .El
 .Sh COMPATIBILITY
 Previous versions of
 .Fn qsort
-did not permit the comparison routine to itself call
+did not permit the comparison routine itself to call
 .Fn qsort 3 .
 This is no longer true.
 .Sh SEE ALSO
@ -161,6 +214,18 @@ This is no longer true.
 .%T "Sorting and Searching"
 .%P pp. 114-123, 145-149
 .Re
 .Rs
 .%A Mcilroy, P.M.
 .%T "Optimistic Sorting and Information Theoretic Complexity"
 .%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms"
 .%V January 1992
 .Re
 .Rs
 .%A Bentley, J.L.
 .%T "Engineering a Sort Function"
 .%J "bentley@research.att.com"
 .%V January 1992
 .Re
 .Sh STANDARDS
 The
 .Fn qsort
--- a/lib/libc/stdlib/qsort.c
+++ b/lib/libc/stdlib/qsort.c
@ -1,6 +1,6 @@
 /*-
- * Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
+ * Copyright (c) 1992, 1993
- * All rights reserved.
+ *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,245 +32,144 @@
 */
 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)qsort.c	5.9 (Berkeley) 2/23/91";*/
+/*static char sccsid[] = "from: @(#)qsort.c	8.1 (Berkeley) 6/4/93";*/
-static char *rcsid = "$Id: qsort.c,v 1.3 1993/08/26 00:48:06 jtc Exp $";
+static char *rcsid = "$Id: qsort.c,v 1.4 1994/06/16 05:26:39 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/types.h>
 #include <stdlib.h>
-/*
+static inline char	*med3 __P((char *, char *, char *, int (*)()));
- * MTHRESH is the smallest partition for which we compare for a median
+static inline void	 swapfunc __P((char *, char *, int, int));
- * value instead of using the middle value.
+
- */
+#define min(a, b)	(a) < (b) ? a : b
 #define	MTHRESH	6
 /*
- * THRESH is the minimum number of entries in a partition for continued
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
 * partitioning.
 */
-#define	THRESH	4
+#define swapcode(TYPE, parmi, parmj, n) { 		\
 	long i = (n) / sizeof (TYPE); 			\
 	register TYPE *pi = (TYPE *) (parmi); 		\
 	register TYPE *pj = (TYPE *) (parmj); 		\
 	do { 						\
 		register TYPE	t = *pi;		\
 		*pi++ = *pj;				\
 		*pj++ = t;				\
        } while (--i > 0);				\
 }
 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
 	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
 static inline void
 swapfunc(a, b, n, swaptype)
 	char *a, *b;
 	int n, swaptype;
 {
 	if(swaptype <= 1) 
 		swapcode(long, a, b, n)
 	else
 		swapcode(char, a, b, n)
 }
 #define swap(a, b)					\
 	if (swaptype == 0) {				\
 		long t = *(long *)(a);			\
 		*(long *)(a) = *(long *)(b);		\
 		*(long *)(b) = t;			\
 	} else						\
 		swapfunc(a, b, es, swaptype)
 #define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
 static inline char *
 med3(a, b, c, cmp)
 	char *a, *b, *c;
 	int (*cmp)();
 {
 	return cmp(a, b) < 0 ?
 	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
 }
 void
-qsort(bot, nmemb, size, compar)
+qsort(a, n, es, cmp)
-	void *bot;
+	void *a;
-	size_t nmemb, size;
+	size_t n, es;
-	int (*compar) __P((const void *, const void *));
+	int (*cmp)();
 {
-	static void insertion_sort(), quick_sort();
+	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
 	int d, r, swaptype, swap_cnt;
-	if (nmemb <= 1)
+loop:	SWAPINIT(a, es);
 	swap_cnt = 0;
 	if (n < 7) {
 		for (pm = a + es; pm < (char *) a + n * es; pm += es)
 			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
 			     pl -= es)
 				swap(pl, pl - es);
 		return;
 	if (nmemb >= THRESH)
 		quick_sort(bot, nmemb, size, compar);
 	else
 		insertion_sort(bot, nmemb, size, compar);
 }
 /*
 * Swap two areas of size number of bytes.  Although qsort(3) permits random
 * blocks of memory to be sorted, sorting pointers is almost certainly the
 * common case (and, were it not, could easily be made so).  Regardless, it
 * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
 * arithmetic gets lost in the time required for comparison function calls.
 */
 #define	SWAP(a, b) { \
 	cnt = size; \
 	do { \
 		ch = *a; \
 		*a++ = *b; \
 		*b++ = ch; \
 	} while (--cnt); \
 }
 /*
 * Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
 * of straight insertion sort after partitioning is complete is better than
 * sorting each small partition as it is created.  This isn't correct in this
 * implementation because comparisons require at least one (and often two)
 * function calls and are likely to be the dominating expense of the sort.
 * Doing a final insertion sort does more comparisons than are necessary
 * because it compares the "edges" and medians of the partitions which are
 * known to be already sorted.
 *
 * This is also the reasoning behind selecting a small THRESH value (see
 * Knuth, page 122, equation 26), since the quicksort algorithm does less
 * comparisons than the insertion sort.
 */
 #define	SORT(bot, n) { \
 	if (n > 1) \
 		if (n == 2) { \
 			t1 = bot + size; \
 			if (compar(t1, bot) < 0) \
 				SWAP(t1, bot); \
 		} else \
 			insertion_sort(bot, n, size, compar); \
 }
 static void
 quick_sort(bot, nmemb, size, compar)
 	register char *bot;
 	register int size;
 	int nmemb, (*compar)();
 {
 	register int cnt;
 	register u_char ch;
 	register char *top, *mid, *t1, *t2;
 	register int n1, n2;
 	char *bsv;
 	static void insertion_sort();
 	/* bot and nmemb must already be set. */
 partition:
 	/* find mid and top elements */
 	mid = bot + size * (nmemb >> 1);
 	top = bot + (nmemb - 1) * size;
 	/*
 	 * Find the median of the first, last and middle element (see Knuth,
 	 * Vol. 3, page 123, Eq. 28).  This test order gets the equalities
 	 * right.
 	 */
 	if (nmemb >= MTHRESH) {
 		n1 = compar(bot, mid);
 		n2 = compar(mid, top);
 		if (n1 < 0 && n2 > 0)
 			t1 = compar(bot, top) < 0 ? top : bot;
 		else if (n1 > 0 && n2 < 0)
 			t1 = compar(bot, top) > 0 ? top : bot;
 		else
 			t1 = mid;
 		/* if mid element not selected, swap selection there */
 		if (t1 != mid) {
 			SWAP(t1, mid);
 			mid -= size;
 		}
 	}
-
+	pm = a + (n / 2) * es;
-	/* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
+	if (n > 7) {
-#define	didswap	n1
+		pl = a;
-#define	newbot	t1
+		pn = a + (n - 1) * es;
-#define	replace	t2
+		if (n > 40) {
-	didswap = 0;
+			d = (n / 8) * es;
-	for (bsv = bot;;) {
+			pl = med3(pl, pl + d, pl + 2 * d, cmp);
-		for (; bot < mid && compar(bot, mid) <= 0; bot += size);
+			pm = med3(pm - d, pm, pm + d, cmp);
-		while (top > mid) {
+			pn = med3(pn - 2 * d, pn - d, pn, cmp);
 			if (compar(mid, top) <= 0) {
 				top -= size;
 				continue;
 			}
 			newbot = bot + size;	/* value of bot after swap */
 			if (bot == mid)		/* top <-> mid, mid == top */
 				replace = mid = top;
 			else {			/* bot <-> top */
 				replace = top;
 				top -= size;
 			}
 			goto swap;
 		}
-		if (bot == mid)
+		pm = med3(pl, pm, pn, cmp);
 	}
 	swap(a, pm);
 	pa = pb = a + es;
 	pc = pd = a + (n - 1) * es;
 	for (;;) {
 		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
 			if (r == 0) {
 				swap_cnt = 1;
 				swap(pa, pb);
 				pa += es;
 			}
 			pb += es;
 		}
 		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
 			if (r == 0) {
 				swap_cnt = 1;
 				swap(pc, pd);
 				pd -= es;
 			}
 			pc -= es;
 		}
 		if (pb > pc)
 			break;
-
+		swap(pb, pc);
-		/* bot <-> mid, mid == bot */
+		swap_cnt = 1;
-		replace = mid;
+		pb += es;
-		newbot = mid = bot;		/* value of bot after swap */
+		pc -= es;
 		top -= size;
 swap:		SWAP(bot, replace);
 		bot = newbot;
 		didswap = 1;
 	}
-
+	if (swap_cnt == 0) {  /* Switch to insertion sort */
-	/*
+		for (pm = a + es; pm < (char *) a + n * es; pm += es)
-	 * Quicksort behaves badly in the presence of data which is already
+			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; 
-	 * sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
+			     pl -= es)
-	 * To avoid this worst case behavior, if a re-partitioning occurs
+				swap(pl, pl - es);
 	 * without swapping any elements, it is not further partitioned and
 	 * is insert sorted.  This wins big with almost sorted data sets and
 	 * only loses if the data set is very strangely partitioned.  A fix
 	 * for those data sets would be to return prematurely if the insertion
 	 * sort routine is forced to make an excessive number of swaps, and
 	 * continue the partitioning.
 	 */
 	if (!didswap) {
 		insertion_sort(bsv, nmemb, size, compar);
 		return;
 	}
-	/*
+	pn = a + n * es;
-	 * Re-partition or sort as necessary.  Note that the mid element
+	r = min(pa - (char *)a, pb - pa);
-	 * itself is correctly positioned and can be ignored.
+	vecswap(a, pb - r, r);
-	 */
+	r = min(pd - pc, pn - pd - es);
-#define	nlower	n1
+	vecswap(pb, pn - r, r);
-#define	nupper	n2
+	if ((r = pb - pa) > es)
-	bot = bsv;
+		qsort(a, r / es, es, cmp);
-	nlower = (mid - bot) / size;	/* size of lower partition */
+	if ((r = pd - pc) > es) { 
-	mid += size;
+		/* Iterate rather than recurse to save stack space */
-	nupper = nmemb - nlower - 1;	/* size of upper partition */
+		a = pn - r;
-
+		n = r / es;
-	/*
+		goto loop;
 	 * If must call recursively, do it on the smaller partition; this
 	 * bounds the stack to lg N entries.
 	 */
 	if (nlower > nupper) {
 		if (nupper >= THRESH)
 			quick_sort(mid, nupper, size, compar);
 		else {
 			SORT(mid, nupper);
 			if (nlower < THRESH) {
 				SORT(bot, nlower);
 				return;
 			}
 		}
 		nmemb = nlower;
 	} else {
 		if (nlower >= THRESH)
 			quick_sort(bot, nlower, size, compar);
 		else {
 			SORT(bot, nlower);
 			if (nupper < THRESH) {
 				SORT(mid, nupper);
 				return;
 			}
 		}
 		bot = mid;
 		nmemb = nupper;
 	}
 	goto partition;
 	/* NOTREACHED */
 }
 static void
 insertion_sort(bot, nmemb, size, compar)
 	char *bot;
 	register int size;
 	int nmemb, (*compar)();
 {
 	register int cnt;
 	register u_char ch;
 	register char *s1, *s2, *t1, *t2, *top;
 	/*
 	 * A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
 	 * S).  Insertion sort has the same worst case as most simple sorts
 	 * (O N^2).  It gets used here because it is (O N) in the case of
 	 * sorted data.
 	 */
 	top = bot + nmemb * size;
 	for (t1 = bot + size; t1 < top;) {
 		for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
 		if (t1 != (t2 += size)) {
 			/* Bubble bytes up through each element. */
 			for (cnt = size; cnt--; ++t1) {
 				ch = *t1;
 				for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
 					*s1 = *s2;
 				*s1 = ch;
 			}
 		} else
 			t1 += size;
 	}
 /*		qsort(pn - r, r / es, es, cmp);*/
 }
--- a/lib/libc/stdlib/radixsort.3
+++ b/lib/libc/stdlib/radixsort.3
@ -1,5 +1,5 @@
-.\" Copyright (c) 1990, 1991 The Regents of the University of California.
+.\" Copyright (c) 1990, 1991, 1993
-.\" All rights reserved.
+.\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
@ -29,10 +29,10 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\"     from: @(#)radixsort.3	5.5 (Berkeley) 4/19/91
+.\"     from: @(#)radixsort.3	8.2 (Berkeley) 1/27/94
-.\"	$Id: radixsort.3,v 1.2 1993/08/01 07:44:21 mycroft Exp $
+.\"	$Id: radixsort.3,v 1.3 1994/06/16 05:26:40 mycroft Exp $
 .\"
-.Dd April 19, 1991
+.Dd January 27, 1994
 .Dt RADIXSORT 3
 .Os
 .Sh NAME
@ -42,27 +42,23 @@
 .Fd #include <limits.h>
 .Fd #include <stdlib.h>
 .Ft int
-.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_char endbyte"
+.Fn radixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
 .Ft int
 .Fn sradixsort "u_char **base" "int nmemb" "u_char *table" "u_int endbyte"
 .Sh DESCRIPTION
 The
 .Fn radixsort
-function
+and
-is a modified radix sort.
+.Fn sradixsort
 functions
 are implementations of radix sort.
 .Pp
-The
+These functions sort an array of pointers to byte strings, the initial
-.Fn radixsort
+member of which is referenced by
 function sorts an array of
 .Fa nmemb
 pointers to byte strings, the initial member of which is referenced
 by
 .Fa base .
 The byte strings may contain any values; the end of each string
 is denoted by the user-specified value
 .Fa endbyte .
 The contents of the array are sorted in ascending order according
 to the
 .Tn ASCII
 order of the byte strings they reference.
 .Pp
 Applications may specify a sort order by providing the
 .Fa table
@ -74,38 +70,58 @@ must reference an array of
 .Dv UCHAR_MAX
 + 1 bytes which contains the sort
 weight of each possible byte value.
-The end-of-string byte must have a sort weight of 0.
+The end-of-string byte must have a sort weight of 0 or 255
 (for sorting in reverse order).
 More than one byte may have the same sort weight.
 The
 .Fa table
 argument
 is useful for applications which wish to sort different characters
-equally; for example, providing a table with the same weights
+equally, for example, providing a table with the same weights
 for A-Z as for a-z will result in a case-insensitive sort.
 If
 .Fa table
 is NULL, the contents of the array are sorted in ascending order
 according to the
 .Tn ASCII
 order of the byte strings they reference and
 .Fa endbyte
 has a sorting weight of 0.
 .Pp
 The
 .Fn sradixsort
 function is stable, that is, if two elements compare as equal, their
 order in the sorted array is unchanged.
 The
 .Fn sradixsort
 function uses additional memory sufficient to hold
 .Fa nmemb
 pointers.
 .Pp
 The
 .Fn radixsort
-function
+function is not stable, but uses no additional memory.
 is stable, that is, if two elements compare as equal, their order in
 the sorted array is unchanged.
 .Pp
-The
+These functions are variants of most-significant-byte radix sorting; in
-.Fn radixsort
+particular, see D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
-function
+They take linear time relative to the number of bytes in the strings.
 is a variant of most-significant-byte radix sorting; in particular, see
 D.E. Knuth's Algorithm R and section 5.2.5, exercise 10.
 The
 .Fn radixsort
 function
 takes linear time relative to the number of bytes in the strings.
 .Sh RETURN VALUES
 Upon successful completion 0 is returned.
 Otherwise, \-1 is returned and the global variable 
 .Va errno
 is set to indicate the error.
 .Sh ERRORS
-The
+.Bl -tag -width Er
-.Fn radixsort
+.It Bq Er EINVAL
 The value of the
 .Fa endbyte
 element of
 .Fa table
 is not 0 or 255.
 .El
 .Pp
 Additionally, the
 .Fn sradixsort
 function
 may fail and set
 .Va errno
@ -131,14 +147,15 @@ for any of the errors specified for the library routine
 .%V Vol. 16
 .%N No. 6
 .Re
 .Rs
 .%A McIlroy, P.
 .%D 1993
 .%B "Engineering Radix Sort"
 .%T "Computing Systems"
 .%V Vol. 6:1
 .%P pp. 5-27
 .Re
 .Sh HISTORY
 The
 .Fn radixsort
-function is
+function first appeared in 4.4BSD.
 .Ud .
 .Sh BUGS
 The
 .Fa nmemb
 argument
 must be less than the maximum integer,
 .Dv INT_MAX .
--- a/lib/libc/stdlib/radixsort.c
+++ b/lib/libc/stdlib/radixsort.c
@ -1,6 +1,9 @@
 /*-
- * Copyright (c) 1990 The Regents of the University of California.
+ * Copyright (c) 1990, 1993
- * All rights reserved.
+ *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Peter McIlroy and by Dan Bernstein at New York University, 
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@ -32,259 +35,285 @@
 */
 #if defined(LIBC_SCCS) && !defined(lint)
-/*static char *sccsid = "from: @(#)radixsort.c	5.7 (Berkeley) 2/23/91";*/
+/*static char sccsid[] = "from: @(#)radixsort.c	8.1 (Berkeley) 6/4/93";*/
-static char *rcsid = "$Id: radixsort.c,v 1.3 1993/08/26 00:48:07 jtc Exp $";
+static char *rcsid = "$Id: radixsort.c,v 1.4 1994/06/16 05:26:44 mycroft Exp $";
 #endif /* LIBC_SCCS and not lint */
 /*
 * Radixsort routines.
 * 
 * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
 * Use radixsort(a, n, trace, endchar) for this case.
 * 
 * For stable sorting (using N extra pointers) use sradixsort(), which calls
 * r_sort_b().
 * 
 * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
 * "Engineering Radix Sort".
 */
 #include <sys/types.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <string.h>
+#include <errno.h>
-/*
+typedef struct {
- * __rspartition is the cutoff point for a further partitioning instead
+	const u_char **sa;
- * of a shellsort.  If it changes check __rsshell_increments.  Both of
+	int sn, si;
- * these are exported, as the best values are data dependent.
+} stack;
 */
 #define	NPARTITION	40
 int __rspartition = NPARTITION;
 int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 };
-/*
+static inline void simplesort
- * Stackp points to context structures, where each structure schedules a
+	    __P((const u_char **, int, int, const u_char *, u_int));
- * partitioning.  Radixsort exits when the stack is empty.
+static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
- *
+static void r_sort_b __P((const u_char **,
- * If the buckets are placed on the stack randomly, the worst case is when
+	    const u_char **, int, int, const u_char *, u_int));
 * all the buckets but one contain (npartitions + 1) elements and the bucket
 * pushed on the stack last contains the rest of the elements.  In this case,
 * stack growth is bounded by:
 *
 *	limit = (nelements / (npartitions + 1)) - 1;
 *
 * This is a very large number, 52,377,648 for the maximum 32-bit signed int.
 *
 * By forcing the largest bucket to be pushed on the stack first, the worst
 * case is when all but two buckets each contain (npartitions + 1) elements,
 * with the remaining elements split equally between the first and last
 * buckets pushed on the stack.  In this case, stack growth is bounded when:
 *
 *	for (partition_cnt = 0; nelements > npartitions; ++partition_cnt)
 *		nelements =
 *		    (nelements - (npartitions + 1) * (nbuckets - 2)) / 2;
 * The bound is:
 *
 *	limit = partition_cnt * (nbuckets - 1);
 *
 * This is a much smaller number, 4590 for the maximum 32-bit signed int.
 */
 #define	NBUCKETS	(UCHAR_MAX + 1)
-typedef struct _stack {
+#define	THRESHOLD	20		/* Divert to simplesort(). */
-	const u_char **bot;
+#define	SIZE		512		/* Default stack size. */
 	int indx, nmemb;
 } CONTEXT;
-#define	STACKPUSH { \
+#define SETUP {								\
-	stackp->bot = p; \
+	if (tab == NULL) {						\
-	stackp->nmemb = nmemb; \
+		tr = tr0;						\
-	stackp->indx = indx; \
+		for (c = 0; c < endch; c++)				\
-	++stackp; \
+			tr0[c] = c + 1;					\
-}
+		tr0[c] = 0;						\
-#define	STACKPOP { \
+		for (c++; c < 256; c++)					\
-	if (stackp == stack) \
+			tr0[c] = c;					\
-		break; \
+		endch = 0;						\
-	--stackp; \
+	} else {							\
-	bot = stackp->bot; \
+		endch = tab[endch];					\
-	nmemb = stackp->nmemb; \
+		tr = tab;						\
-	indx = stackp->indx; \
+		if (endch != 0 && endch != 255) {			\
 			errno = EINVAL;					\
 			return (-1);					\
 		}							\
 	}								\
 }
 /*
 * A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5,
 * Ex. 10 and 12.  Also, "Three Partition Refinement Algorithms, Paige
 * and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987.
 *
 * This uses a simple sort as soon as a bucket crosses a cutoff point,
 * rather than sorting the entire list after partitioning is finished.
 * This should be an advantage.
 *
 * This is pure MSD instead of LSD of some number of MSD, switching to
 * the simple sort as soon as possible.  Takes linear time relative to
 * the number of bytes in the strings.
 */
 int
-#if __STDC__
+radixsort(a, n, tab, endch)
-radixsort(const u_char **l1, int nmemb, const u_char *tab, u_char endbyte)
+	const u_char **a, *tab;
-#else
+	int n;
-radixsort(l1, nmemb, tab, endbyte)
+	u_int endch;
 	const u_char **l1;
 	register int nmemb;
 	const u_char *tab;
 	u_char endbyte;
 #endif
 {
-	register int i, indx, t1, t2;
+	const u_char *tr;
-	register const u_char **l2;
+	int c;
-	register const u_char **p;
+	u_char tr0[256];
 	register const u_char **bot;
 	register const u_char *tr;
 	CONTEXT *stack, *stackp;
 	int c[NBUCKETS + 1], max;
 	u_char ltab[NBUCKETS];
 	static void shellsort();
-	if (nmemb <= 1)
+	SETUP;
-		return(0);
+	r_sort_a(a, n, 0, tr, endch);
 	return (0);
 }
-	/*
+int
-	 * T1 is the constant part of the equation, the number of elements
+sradixsort(a, n, tab, endch)
-	 * represented on the stack between the top and bottom entries.
+	const u_char **a, *tab;
-	 * It doesn't get rounded as the divide by 2 rounds down (correct
+	int n;
-	 * for a value being subtracted).  T2, the nelem value, has to be
+	u_int endch;
-	 * rounded up before each divide because we want an upper bound;
+{
-	 * this could overflow if nmemb is the maximum int.
+	const u_char *tr, **ta;
-	 */
+	int c;
-	t1 = ((__rspartition + 1) * (NBUCKETS - 2)) >> 1;
+	u_char tr0[256];
 	for (i = 0, t2 = nmemb; t2 > __rspartition; i += NBUCKETS - 1)
 		t2 = ((t2 + 1) >> 1) - t1;
 	if (i) {
 		if (!(stack = stackp = (CONTEXT *)malloc(i * sizeof(CONTEXT))))
 			return(-1);
 	} else
 		stack = stackp = NULL;
-	/*
+	SETUP;
-	 * There are two arrays, one provided by the user (l1), and the
+	if (n < THRESHOLD)
-	 * temporary one (l2).  The data is sorted to the temporary stack,
+		simplesort(a, n, 0, tr, endch);
 	 * and then copied back.  The speedup of using index to determine
 	 * which stack the data is on and simply swapping stacks back and
 	 * forth, thus avoiding the copy every iteration, turns out to not
 	 * be any faster than the current implementation.
 	 */
 	if (!(l2 = (const u_char **)malloc(sizeof(u_char *) * nmemb)))
 		return(-1);
 	/*
 	 * Tr references a table of sort weights; multiple entries may
 	 * map to the same weight; EOS char must have the lowest weight.
 	 */
 	if (tab)
 		tr = tab;
 	else {
-		for (t1 = 0, t2 = endbyte; t1 < t2; ++t1)
+		if ((ta = malloc(n * sizeof(a))) == NULL)
-			ltab[t1] = t1 + 1;
+			return (-1);
-		ltab[t2] = 0;
+		r_sort_b(a, ta, n, 0, tr, endch);
-		for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1)
+		free(ta);
 			ltab[t1] = t1;
 		tr = ltab;
 	}
-
+	return (0);
 	/* First sort is entire stack */
 	bot = l1;
 	indx = 0;
 	for (;;) {
 		/* Clear bucket count array */
 		bzero((char *)c, sizeof(c));
 		/*
 		 * Compute number of items that sort to the same bucket
 		 * for this index.
 		 */
 		for (p = bot, i = nmemb; --i >= 0;)
 			++c[tr[(*p++)[indx]]];
 		/*
 		 * Sum the number of characters into c, dividing the temp
 		 * stack into the right number of buckets for this bucket,
 		 * this index.  C contains the cumulative total of keys
 		 * before and included in this bucket, and will later be
 		 * used as an index to the bucket.  c[NBUCKETS] contains
 		 * the total number of elements, for determining how many
 		 * elements the last bucket contains.  At the same time
 		 * find the largest bucket so it gets pushed first.
 		 */
 		for (i = max = t1 = 0, t2 = __rspartition; i <= NBUCKETS; ++i) {
 			if (c[i] > t2) {
 				t2 = c[i];
 				max = i;
 			}
 			t1 = c[i] += t1;
 		}
 		/*
 		 * Partition the elements into buckets; c decrements through
 		 * the bucket, and ends up pointing to the first element of
 		 * the bucket.
 		 */
 		for (i = nmemb; --i >= 0;) {
 			--p;
 			l2[--c[tr[(*p)[indx]]]] = *p;
 		}
 		/* Copy the partitioned elements back to user stack */
 		bcopy(l2, bot, nmemb * sizeof(u_char *));
 		++indx;
 		/*
 		 * Sort buckets as necessary; don't sort c[0], it's the
 		 * EOS character bucket, and nothing can follow EOS.
 		 */
 		for (i = max; i; --i) {
 			if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
 				continue;
 			p = bot + t1;
 			if (nmemb > __rspartition)
 				STACKPUSH
 			else
 				shellsort(p, indx, nmemb, tr);
 		}
 		for (i = max + 1; i < NBUCKETS; ++i) {
 			if ((nmemb = c[i + 1] - (t1 = c[i])) < 2)
 				continue;
 			p = bot + t1;
 			if (nmemb > __rspartition)
 				STACKPUSH
 			else
 				shellsort(p, indx, nmemb, tr);
 		}
 		/* Break out when stack is empty */
 		STACKPOP
 	}
 	free((char *)l2);
 	free((char *)stack);
 	return(0);
 }
-/*
+#define empty(s)	(s >= sp)
- * Shellsort (diminishing increment sort) from Data Structures and
+#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
- * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
+#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
- * see also Knuth Vol. 3, page 84.  The increments are selected from
+#define swap(a, b, t)	t = a, a = b, b = t
- * formula (8), page 95.  Roughly O(N^3/2).
+
- */
+/* Unstable, in-place sort. */
-static void
+void
-shellsort(p, indx, nmemb, tr)
+r_sort_a(a, n, i, tr, endch)
-	register u_char **p, *tr;
+	const u_char **a;
-	register int indx, nmemb;
+	int n, i;
 	const u_char *tr;
 	u_int endch;
 {
-	register u_char ch, *s1, *s2;
+	static int count[256], nc, bmin;
-	register int incr, *incrp, t1, t2;
+	register int c;
 	register const u_char **ak, *r;
 	stack s[SIZE], *sp, *sp0, *sp1, temp;
 	int *cp, bigc;
 	const u_char **an, *t, **aj, **top[256];
-	for (incrp = __rsshell_increments; incr = *incrp++;)
+	/* Set up stack. */
-		for (t1 = incr; t1 < nmemb; ++t1)
+	sp = s;
-			for (t2 = t1 - incr; t2 >= 0;) {
+	push(a, n, i);
-				s1 = p[t2] + indx;
+	while (!empty(s)) {
-				s2 = p[t2 + incr] + indx;
+		pop(a, n, i);
-				while ((ch = tr[*s1++]) == tr[*s2] && ch)
+		if (n < THRESHOLD) {
-					++s2;
+			simplesort(a, n, i, tr, endch);
-				if (ch > tr[*s2]) {
+			continue;
-					s1 = p[t2];
+		}
-					p[t2] = p[t2 + incr];
+		an = a + n;
-					p[t2 + incr] = s1;
+
-					t2 -= incr;
+		/* Make character histogram. */
-				} else
+		if (nc == 0) {
-					break;
+			bmin = 255;	/* First occupied bin, excluding eos. */
 			for (ak = a; ak < an;) {
 				c = tr[(*ak++)[i]];
 				if (++count[c] == 1 && c != endch) {
 					if (c < bmin)
 						bmin = c;
 					nc++;
 				}
 			}
 			if (sp + nc > s + SIZE) {	/* Get more stack. */
 				r_sort_a(a, n, i, tr, endch);
 				continue;
 			}
 		}
 		/*
 		 * Set top[]; push incompletely sorted bins onto stack.
 		 * top[] = pointers to last out-of-place element in bins.
 		 * count[] = counts of elements in bins.
 		 * Before permuting: top[c-1] + count[c] = top[c];
 		 * during deal: top[c] counts down to top[c-1].
 		 */
 		sp0 = sp1 = sp;		/* Stack position of biggest bin. */
 		bigc = 2;		/* Size of biggest bin. */
 		if (endch == 0)		/* Special case: set top[eos]. */
 			top[0] = ak = a + count[0];
 		else {
 			ak = a;
 			top[255] = an;
 		}
 		for (cp = count + bmin; nc > 0; cp++) {
 			while (*cp == 0)	/* Find next non-empty pile. */
 				cp++;
 			if (*cp > 1) {
 				if (*cp > bigc) {
 					bigc = *cp;
 					sp1 = sp;
 				}
 				push(ak, *cp, i+1);
 			}
 			top[cp-count] = ak += *cp;
 			nc--;
 		}
 		swap(*sp0, *sp1, temp);	/* Play it safe -- biggest bin last. */
 		/*
 		 * Permute misplacements home.  Already home: everything
 		 * before aj, and in bin[c], items from top[c] on.
 		 * Inner loop:
 		 *	r = next element to put in place;
 		 *	ak = top[r[i]] = location to put the next element.
 		 *	aj = bottom of 1st disordered bin.
 		 * Outer loop:
 		 *	Once the 1st disordered bin is done, ie. aj >= ak,
 		 *	aj<-aj + count[c] connects the bins in a linked list;
 		 *	reset count[c].
 		 */
 		for (aj = a; aj < an;  *aj = r, aj += count[c], count[c] = 0)
 			for (r = *aj;  aj < (ak = --top[c = tr[r[i]]]);)
 				swap(*ak, r, t);
 	}
 }
 /* Stable sort, requiring additional memory. */
 void
 r_sort_b(a, ta, n, i, tr, endch)
 	const u_char **a, **ta;
 	int n, i;
 	const u_char *tr;
 	u_int endch;
 {
 	static int count[256], nc, bmin;
 	register int c;
 	register const u_char **ak, **ai;
 	stack s[512], *sp, *sp0, *sp1, temp;
 	const u_char **top[256];
 	int *cp, bigc;
 	sp = s;
 	push(a, n, i);
 	while (!empty(s)) {
 		pop(a, n, i);
 		if (n < THRESHOLD) {
 			simplesort(a, n, i, tr, endch);
 			continue;
 		}
 		if (nc == 0) {
 			bmin = 255;
 			for (ak = a + n; --ak >= a;) {
 				c = tr[(*ak)[i]];
 				if (++count[c] == 1 && c != endch) {
 					if (c < bmin)
 						bmin = c;
 					nc++;
 				}
 			}
 			if (sp + nc > s + SIZE) {
 				r_sort_b(a, ta, n, i, tr, endch);
 				continue;
 			}
 		}
 		sp0 = sp1 = sp;
 		bigc = 2;
 		if (endch == 0) {
 			top[0] = ak = a + count[0];
 			count[0] = 0;
 		} else {
 			ak = a;
 			top[255] = a + n;
 			count[255] = 0;
 		}
 		for (cp = count + bmin; nc > 0; cp++) {
 			while (*cp == 0)
 				cp++;
 			if ((c = *cp) > 1) {
 				if (c > bigc) {
 					bigc = c;
 					sp1 = sp;
 				}
 				push(ak, c, i+1);
 			}
 			top[cp-count] = ak += c;
 			*cp = 0;			/* Reset count[]. */
 			nc--;
 		}
 		swap(*sp0, *sp1, temp);
 		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
 			*--ak = *--ai;
 		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
 			*--top[tr[(*ak)[i]]] = *ak;
 	}
 }
 static inline void
 simplesort(a, n, b, tr, endch)	/* insertion sort */
 	register const u_char **a;
 	int n, b;
 	register const u_char *tr;
 	u_int endch;
 {
 	register u_char ch;
 	const u_char  **ak, **ai, *s, *t;
 	for (ak = a+1; --n >= 1; ak++)
 		for (ai = ak; ai > a; ai--) {
 			for (s = ai[0] + b, t = ai[-1] + b;
 			    (ch = tr[*s]) != endch; s++, t++)
 				if (ch != tr[*t])
 					break;
 			if (ch >= tr[*t])
 				break;
 			swap(ai[0], ai[-1], s);
 		}
 }