Implement naive powersort
This commit is contained in:
parent
62822051a9
commit
6c41c32949
@ -5,7 +5,7 @@
|
|||||||
#include <kuroko/util.h>
|
#include <kuroko/util.h>
|
||||||
|
|
||||||
#if defined(__TINYC__) || (defined(_MSC_VER) && !defined(__clang__))
|
#if defined(__TINYC__) || (defined(_MSC_VER) && !defined(__clang__))
|
||||||
int __builtin_clz(unsigned int x) {
|
static int __builtin_clz(unsigned int x) {
|
||||||
int i = 31;
|
int i = 31;
|
||||||
while (!(x & (1 << i)) && i >= 0) i--;
|
while (!(x & (1 << i)) && i >= 0) i--;
|
||||||
return 31-i;
|
return 31-i;
|
||||||
|
348
src/obj_list.c
348
src/obj_list.c
@ -416,79 +416,323 @@ KRK_Method(list,copy) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @brief In-place reverse a value array. */
|
||||||
|
static void reverse_values(KrkValue * values, size_t n) {
|
||||||
|
KrkValue * end = values + n - 1;
|
||||||
|
while (values < end) {
|
||||||
|
krk_currentThread.scratchSpace[0] = *values;
|
||||||
|
*values = *end;
|
||||||
|
*end = krk_currentThread.scratchSpace[0];
|
||||||
|
values++;
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
krk_currentThread.scratchSpace[0] = NONE_VAL();
|
||||||
|
}
|
||||||
|
|
||||||
KRK_Method(list,reverse) {
|
KRK_Method(list,reverse) {
|
||||||
METHOD_TAKES_NONE();
|
METHOD_TAKES_NONE();
|
||||||
pthread_rwlock_wrlock(&self->rwlock);
|
pthread_rwlock_wrlock(&self->rwlock);
|
||||||
for (size_t i = 0; i < (self->values.count) / 2; i++) {
|
if (self->values.count > 1) reverse_values(self->values.values, self->values.count);
|
||||||
KrkValue tmp = self->values.values[i];
|
|
||||||
self->values.values[i] = self->values.values[self->values.count-i-1];
|
|
||||||
self->values.values[self->values.count-i-1] = tmp;
|
|
||||||
}
|
|
||||||
pthread_rwlock_unlock(&self->rwlock);
|
pthread_rwlock_unlock(&self->rwlock);
|
||||||
return NONE_VAL();
|
return NONE_VAL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct SortSlice {
|
||||||
|
KrkValue * keys;
|
||||||
|
KrkValue * values;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SliceAndPower {
|
||||||
|
struct SortSlice begin;
|
||||||
|
size_t power;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Run {
|
||||||
|
struct SortSlice start;
|
||||||
|
struct SortSlice end;
|
||||||
|
size_t power;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief s++ */
|
||||||
|
static inline void slice_advance(struct SortSlice * slice) {
|
||||||
|
slice->keys++;
|
||||||
|
if (slice->values) slice->values++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief s-- */
|
||||||
|
static inline void slice_decrement(struct SortSlice * slice) {
|
||||||
|
slice->keys--;
|
||||||
|
if (slice->values) slice->values--;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* @brief s + 1 */
|
||||||
|
static struct SortSlice slice_next(struct SortSlice slice) {
|
||||||
|
return (struct SortSlice){slice.keys + 1, slice.values ? slice.values + 1 : NULL};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief s + n */
|
||||||
|
static struct SortSlice slice_plus(struct SortSlice slice, ssize_t n) {
|
||||||
|
return (struct SortSlice){slice.keys + n, slice.values ? slice.values + n : NULL};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Copy start-end to buffer */
|
||||||
|
static void copy_slice(struct SortSlice start, struct SortSlice end, struct SortSlice buffer) {
|
||||||
|
while (start.keys != end.keys) {
|
||||||
|
*buffer.keys = *start.keys;
|
||||||
|
if (buffer.values) *buffer.values = *start.values;
|
||||||
|
slice_advance(&start);
|
||||||
|
slice_advance(&buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Very strictly a < b */
|
||||||
static int _list_sorter(KrkValue a, KrkValue b) {
|
static int _list_sorter(KrkValue a, KrkValue b) {
|
||||||
KrkValue ltComp = krk_operator_lt(a,b);
|
KrkValue comp = krk_operator_lt(a,b);
|
||||||
if (IS_NONE(ltComp) || (IS_BOOLEAN(ltComp) && AS_BOOLEAN(ltComp))) return -1;
|
return (IS_NONE(comp) || (IS_BOOLEAN(comp) && AS_BOOLEAN(comp)));
|
||||||
KrkValue gtComp = krk_operator_gt(a,b);
|
|
||||||
if (IS_NONE(gtComp) || (IS_BOOLEAN(gtComp) && AS_BOOLEAN(gtComp))) return 1;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void list_swap(KrkList *list, size_t i, size_t j) {
|
/** @brief While next is strictly < current, advance current */
|
||||||
krk_currentThread.scratchSpace[0] = list->values.values[i];
|
static struct SortSlice powersort_strictlyDecreasingPrefix(struct SortSlice begin, struct SortSlice end) {
|
||||||
list->values.values[i] = list->values.values[j];
|
while (begin.keys + 1 < end.keys && _list_sorter(*(begin.keys + 1), *begin.keys)) slice_advance(&begin);
|
||||||
list->values.values[j] = krk_currentThread.scratchSpace[0];
|
return slice_next(begin);
|
||||||
krk_currentThread.scratchSpace[0] = NONE_VAL();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int partition(KrkList *list, KrkValue key, int reverse, ssize_t lo, ssize_t hi, ssize_t *lt, ssize_t *gt) {
|
/** @brief While next is greater than or equal to current, advance current */
|
||||||
/* Create key from pivot */
|
static struct SortSlice powersort_weaklyIncreasingPrefix(struct SortSlice begin, struct SortSlice end) {
|
||||||
if (!IS_NONE(key)) krk_push(key);
|
while (begin.keys + 1 < end.keys && !_list_sorter(*(begin.keys + 1), *begin.keys)) slice_advance(&begin);
|
||||||
krk_push(list->values.values[(lo+hi)/2]);
|
return slice_next(begin);
|
||||||
if (!IS_NONE(key)) krk_push(krk_callStack(1));
|
}
|
||||||
|
|
||||||
ssize_t _lt = lo;
|
/**
|
||||||
ssize_t _eq = lo;
|
* @brief Extend a run to the right
|
||||||
ssize_t _gt = hi;
|
*
|
||||||
|
* Returns a slice pointing at the end of the run after extended it to the right.
|
||||||
|
* The resulting run consists of strictly ordered (a <= b, b > a) entries. We also
|
||||||
|
* handle reverse runs by reversing them in-place.
|
||||||
|
*
|
||||||
|
* @param begin Start of run
|
||||||
|
* @param end End of available input to scan; always end of list.
|
||||||
|
* @returns Slice pointing to end of run
|
||||||
|
*/
|
||||||
|
static struct SortSlice powersort_extend_and_reverse_right(struct SortSlice begin, struct SortSlice end) {
|
||||||
|
struct SortSlice j = begin;
|
||||||
|
if (j.keys == end.keys) return j;
|
||||||
|
if (j.keys + 1 == end.keys) return slice_next(j);
|
||||||
|
if (_list_sorter(*slice_next(j).keys, *j.keys)) {
|
||||||
|
/* If next is strictly less than current, begin a reversed chain; we already know
|
||||||
|
* we can advance by one, so do that before continuing to save a comparison. */
|
||||||
|
j = powersort_strictlyDecreasingPrefix(slice_next(begin), end);
|
||||||
|
reverse_values(begin.keys, j.keys - begin.keys);
|
||||||
|
if (begin.values) reverse_values(begin.values, j.values - begin.values);
|
||||||
|
} else {
|
||||||
|
/* Weakly increasing means j+1 >= j; continue with that chain*/
|
||||||
|
j = powersort_weaklyIncreasingPrefix(slice_next(begin), end);
|
||||||
|
}
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
while (_eq <= _gt) {
|
#if defined(__TINYC__) || (defined(_MSC_VER) && !defined(__clang__))
|
||||||
if (!IS_NONE(key)) krk_push(key);
|
static int __builtin_clz(unsigned int x) {
|
||||||
krk_push(list->values.values[_eq]);
|
int i = 31;
|
||||||
if (!IS_NONE(key)) krk_push(krk_callStack(1));
|
while (!(x & (1 << i)) && i >= 0) i--;
|
||||||
|
return 31-i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int res = _list_sorter(krk_peek(reverse),krk_peek(1-reverse));
|
/**
|
||||||
if (krk_currentThread.flags & KRK_THREAD_HAS_EXCEPTION) return 1;
|
* @brief Calculate power.
|
||||||
|
*
|
||||||
|
* I'll be honest here, I don't really know what this does; it's from the reference impl.
|
||||||
|
* and described in the paper.
|
||||||
|
*/
|
||||||
|
static size_t powersort_power(size_t begin, size_t end, size_t beginA, size_t beginB, size_t endB) {
|
||||||
|
size_t n = end - begin;
|
||||||
|
unsigned long l2 = beginA + beginB - 2 * begin;
|
||||||
|
unsigned long r2 = beginB + endB - 2 * begin;
|
||||||
|
unsigned int a = (unsigned int)((l2 << 30) / n);
|
||||||
|
unsigned int b = (unsigned int)((r2 << 30) / n);
|
||||||
|
return __builtin_clz(a ^ b);
|
||||||
|
}
|
||||||
|
|
||||||
if (res < 0) {
|
/**
|
||||||
list_swap(list,_eq,_lt);
|
* @brief Merge neighboring runs.
|
||||||
_lt++;
|
*
|
||||||
_eq++;
|
* Merges the neighboring, sorted runs [left, mid) and [mid, right) using the provided
|
||||||
} else if (res > 0) {
|
* buffer space. Specifically, the smaller of the two runs is copied to the buffer, and
|
||||||
list_swap(list,_eq,_gt);
|
* then merging occurs in-place.
|
||||||
_gt--;
|
*
|
||||||
} else {
|
* @param left Start of the first run
|
||||||
_eq++;
|
* @param mid End of first run, start of second run
|
||||||
|
* @param right End of second run
|
||||||
|
* @param buffer Scratch space
|
||||||
|
*/
|
||||||
|
static void powersort_merge(struct SortSlice left, struct SortSlice mid, struct SortSlice right, struct SortSlice buffer) {
|
||||||
|
size_t n1 = mid.keys - left.keys;
|
||||||
|
size_t n2 = right.keys - mid.keys;
|
||||||
|
|
||||||
|
if (n1 <= n2) {
|
||||||
|
copy_slice(left, mid, buffer);
|
||||||
|
struct SortSlice c1 = buffer, e1 = slice_plus(buffer, n1);
|
||||||
|
struct SortSlice c2 = mid, e2 = right, o = left;
|
||||||
|
|
||||||
|
while (c1.keys < e1.keys && c2.keys < e2.keys) {
|
||||||
|
if (!_list_sorter(*c2.keys, *c1.keys)) {
|
||||||
|
*o.keys = *c1.keys;
|
||||||
|
if (o.values) *o.values = *c1.values;
|
||||||
|
slice_advance(&c1);
|
||||||
|
} else {
|
||||||
|
*o.keys = *c2.keys;
|
||||||
|
if (o.values) *o.values = *c2.values;
|
||||||
|
slice_advance(&c2);
|
||||||
|
}
|
||||||
|
slice_advance(&o);
|
||||||
}
|
}
|
||||||
|
|
||||||
krk_pop();
|
while (c1.keys < e1.keys) {
|
||||||
|
*o.keys = *c1.keys;
|
||||||
|
if (o.values) *o.values = *c1.values;
|
||||||
|
slice_advance(&c1);
|
||||||
|
slice_advance(&o);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
copy_slice(mid, right, buffer);
|
||||||
|
|
||||||
|
struct SortSlice c1 = slice_plus(mid, -1), s1 = left, o = slice_plus(right, -1);
|
||||||
|
struct SortSlice c2 = slice_plus(buffer, n2 - 1), s2 = buffer;
|
||||||
|
|
||||||
|
while (c1.keys >= s1.keys && c2.keys >= s2.keys) {
|
||||||
|
if (!_list_sorter(*c2.keys, *c1.keys)) {
|
||||||
|
*o.keys = *c2.keys;
|
||||||
|
if (o.values) *o.values = *c2.values;
|
||||||
|
slice_decrement(&c2);
|
||||||
|
} else {
|
||||||
|
*o.keys = *c1.keys;
|
||||||
|
if (o.values) *o.values = *c1.values;
|
||||||
|
slice_decrement(&c1);
|
||||||
|
}
|
||||||
|
slice_decrement(&o);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (c2.keys >= s2.keys) {
|
||||||
|
*o.keys = *c2.keys;
|
||||||
|
if (o.values) *o.values = *c2.values;
|
||||||
|
slice_decrement(&c2);
|
||||||
|
slice_decrement(&o);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
krk_pop(); /* Pop pivot key. */
|
|
||||||
|
|
||||||
*lt = _lt;
|
|
||||||
*gt = _gt;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void quicksort(KrkList * list, KrkValue key, int reverse, ssize_t lo, ssize_t hi) {
|
/**
|
||||||
if (lo >= 0 && lo < hi) {
|
* @brief Powersort - merge-sort sorted runs
|
||||||
ssize_t lt, gt;
|
*
|
||||||
if (partition(list, key, reverse, lo, hi, <, >)) return;
|
* This is an implementation of Munro-Wild Powersort from the paper at:
|
||||||
quicksort(list, key, reverse, lo, lt - 1);
|
* @ref https://www.wild-inter.net/publications/html/munro-wild-2018.pdf.html
|
||||||
quicksort(list, key, reverse, gt + 1, hi);
|
*
|
||||||
|
* The reference implementation was also a helpful thing to study, and much
|
||||||
|
* of the iteration and merging is based on its use of C++ iterators:
|
||||||
|
* @ref https://github.com/sebawild/powersort
|
||||||
|
*
|
||||||
|
* There's no fancy extensions or improvements here, just the plain approach
|
||||||
|
* set out in the paper, which is probably good enough for us? That means no
|
||||||
|
* extending short runs to a minimum run length, no fancy node power calcs,
|
||||||
|
* just a short bit of extending and merging.
|
||||||
|
*
|
||||||
|
* If the key function raises an exception, no sorting will be attempted
|
||||||
|
* and the exception from the key function will be raised immediately.
|
||||||
|
*
|
||||||
|
* If the values to be sorted can not compare with __lt__, an exception
|
||||||
|
* should be thrown eventually, but the entire list may still be scanned
|
||||||
|
* and the resulting state is undefined.
|
||||||
|
*
|
||||||
|
* @param list List to sort in-place.
|
||||||
|
* @param key Key function, or None to sort values directly.
|
||||||
|
* @param reverse Sort direction, 0 for normal (a[0] <= b[0], etc.), 1 for reversed.
|
||||||
|
*/
|
||||||
|
static void powersort(KrkList * list, KrkValue key, int reverse) {
|
||||||
|
size_t n = list->values.count;
|
||||||
|
struct SortSlice slice = {list->values.values, NULL};
|
||||||
|
|
||||||
|
/* If there is a key function, create a separate array to store
|
||||||
|
* the resulting key values; shove it in a tuple so we can keep
|
||||||
|
* those key values from being garbage collected. */
|
||||||
|
if (!IS_NONE(key)) {
|
||||||
|
KrkTuple * _keys = krk_newTuple(n);
|
||||||
|
krk_push(OBJECT_VAL(_keys));
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
krk_push(key);
|
||||||
|
krk_push(list->values.values[i]);
|
||||||
|
_keys->values.values[i] = krk_callStack(1);
|
||||||
|
_keys->values.count++;
|
||||||
|
|
||||||
|
/* If the key function threw an exception, bail early. */
|
||||||
|
if (krk_currentThread.flags & KRK_THREAD_HAS_EXCEPTION) goto _end_sort;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* values are secondary, keys are what actually gets sorted */
|
||||||
|
slice.values = slice.keys;
|
||||||
|
slice.keys = _keys->values.values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We handle reverse sort by reversing, sorting normally, and then reversing again */
|
||||||
|
if (reverse) {
|
||||||
|
reverse_values(slice.keys, n);
|
||||||
|
if (slice.values) reverse_values(slice.values, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Supposedly the absolute maximum for this is strictly less than the number of bits
|
||||||
|
* we can fit in a size_t, so 64 ought to cover us until someone tries porting Kuroko
|
||||||
|
* to one of the 128-bit architectures, but even then I don't think we can handle
|
||||||
|
* holding that many values in a list to begin with.
|
||||||
|
*
|
||||||
|
* stack[0] should always be empty. */
|
||||||
|
struct SliceAndPower stack[64] = {0};
|
||||||
|
int top = 0;
|
||||||
|
|
||||||
|
/* Buffer space for the merges. We shouldn't need anywhere close to this much space,
|
||||||
|
* but best to be safe, and we're already allocating a bunch of space for key tuples */
|
||||||
|
KrkTuple * bufferSpace = krk_newTuple(slice.values ? (n * 2) : n);
|
||||||
|
krk_push(OBJECT_VAL(bufferSpace));
|
||||||
|
for (size_t i = 0; i < bufferSpace->values.capacity; ++i) bufferSpace->values.values[bufferSpace->values.count++] = NONE_VAL();
|
||||||
|
struct SortSlice buffer = {&bufferSpace->values.values[0], slice.values ? &bufferSpace->values.values[n] : NULL};
|
||||||
|
|
||||||
|
/* This just take the role of the C++ iterators in the reference implementaiton */
|
||||||
|
struct SortSlice begin = {slice.keys, slice.values};
|
||||||
|
struct SortSlice end = {slice.keys + n, slice.values ? slice.values + n : NULL};
|
||||||
|
|
||||||
|
/* Our first run starts from the left and extends as far as it can. */
|
||||||
|
struct Run a = {begin, powersort_extend_and_reverse_right(begin,end), 0};
|
||||||
|
|
||||||
|
while (a.end.keys < end.keys) {
|
||||||
|
/* Our next run is whatever is after that, assuming the initial run isn't the whole list. */
|
||||||
|
struct Run b = {a.end, powersort_extend_and_reverse_right(a.end, end), 0};
|
||||||
|
/* I don't really understand the power part of powersort, but whatever. */
|
||||||
|
a.power = powersort_power(0, n, a.start.keys - begin.keys, b.start.keys - begin.keys, b.end.keys - begin.keys);
|
||||||
|
|
||||||
|
/* While the stack has things with higher power, merge them into a */
|
||||||
|
while (stack[top].power > a.power) {
|
||||||
|
struct SliceAndPower top_run = stack[top--];
|
||||||
|
powersort_merge(top_run.begin, a.start, a.end, buffer);
|
||||||
|
a.start = top_run.begin;
|
||||||
|
}
|
||||||
|
/* Put a on top of the stack, and then replace a with b */
|
||||||
|
stack[++top] = (struct SliceAndPower){a.start, a.power};
|
||||||
|
a = (struct Run){b.start, b.end, 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
/* While there are things in the stack (excluding the empty 0 slot), merge them into the last a */
|
||||||
|
while (top > 0) {
|
||||||
|
struct SliceAndPower top_run = stack[top--];
|
||||||
|
powersort_merge(top_run.begin, a.start, end, buffer);
|
||||||
|
a.start = top_run.begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
krk_pop(); /* tuple with buffer space */
|
||||||
|
_end_sort:
|
||||||
|
if (!IS_NONE(key)) krk_pop(); /* keys tuple */
|
||||||
|
|
||||||
|
/* If we reversed at the start, reverse again now as the list is forward-sorted */
|
||||||
|
if (reverse) reverse_values(list->values.values, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
KRK_Method(list,sort) {
|
KRK_Method(list,sort) {
|
||||||
@ -499,7 +743,7 @@ KRK_Method(list,sort) {
|
|||||||
if (self->values.count < 2) return NONE_VAL();
|
if (self->values.count < 2) return NONE_VAL();
|
||||||
|
|
||||||
pthread_rwlock_wrlock(&self->rwlock);
|
pthread_rwlock_wrlock(&self->rwlock);
|
||||||
quicksort(self, key, reverse, 0, self->values.count - 1);
|
powersort(self, key, reverse);
|
||||||
pthread_rwlock_unlock(&self->rwlock);
|
pthread_rwlock_unlock(&self->rwlock);
|
||||||
|
|
||||||
return NONE_VAL();
|
return NONE_VAL();
|
||||||
|
Loading…
Reference in New Issue
Block a user