250 lines
7.9 KiB
C
250 lines
7.9 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
│ Copyright (C) 2011 by Valentin Ochs │
|
|
│ │
|
|
│ Permission is hereby granted, free of charge, to any person obtaining a copy │
|
|
│ of this software and associated documentation files (the "Software"), to │
|
|
│ deal in the Software without restriction, including without limitation the │
|
|
│ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or │
|
|
│ sell copies of the Software, and to permit persons to whom the Software is │
|
|
│ furnished to do so, subject to the following conditions: │
|
|
│ │
|
|
│ The above copyright notice and this permission notice shall be included in │
|
|
│ all copies or substantial portions of the Software. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR │
|
|
│ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, │
|
|
│ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE │
|
|
│ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER │
|
|
│ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING │
|
|
│ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS │
|
|
│ IN THE SOFTWARE. │
|
|
└─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/alg/alg.h"
|
|
#include "libc/assert.h"
|
|
#include "libc/nexgen32e/bsf.h"
|
|
#include "libc/str/str.h"
|
|
|
|
asm(".ident\t\"\\n\\n\
|
|
Smoothsort (MIT License)\\n\
|
|
Copyright 2011 Valentin Ochs\\n\
|
|
Discovered by Edsger Dijkstra\"");
|
|
asm(".include \"libc/disclaimer.inc\"");
|
|
|
|
typedef int (*cmpfun)(const void *, const void *, void *);
|
|
|
|
forceinline unsigned bsfz0(unsigned x) {
|
|
if (x) {
|
|
return bsf(x);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
forceinline unsigned pntz(unsigned p[2]) {
|
|
unsigned r;
|
|
assert(p[0] != 0);
|
|
r = bsfz0(p[0] - 1);
|
|
if (r != 0 ||
|
|
(r = 8 * sizeof(unsigned) + bsfz0(p[1])) != 8 * sizeof(unsigned)) {
|
|
return r;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void cycle(size_t width, unsigned char *ar[], size_t n) {
|
|
unsigned i, l;
|
|
unsigned char tmp[256];
|
|
if (n < 2) return;
|
|
ar[n] = tmp;
|
|
while (width) {
|
|
l = sizeof(tmp) < width ? sizeof(tmp) : width;
|
|
memcpy(ar[n], ar[0], l);
|
|
for (i = 0; i < n; i++) {
|
|
memcpy(ar[i], ar[i + 1], l);
|
|
ar[i] += l;
|
|
}
|
|
width -= l;
|
|
}
|
|
}
|
|
|
|
forceinline void shl(unsigned p[2], size_t n) {
|
|
assert(n > 0);
|
|
if (n >= CHAR_BIT * sizeof(unsigned)) {
|
|
n -= CHAR_BIT * sizeof(unsigned);
|
|
p[1] = p[0];
|
|
p[0] = 0;
|
|
}
|
|
p[1] <<= n;
|
|
p[1] |= p[0] >> (sizeof(unsigned) * CHAR_BIT - n);
|
|
p[0] <<= n;
|
|
}
|
|
|
|
forceinline void shr(unsigned p[2], size_t n) {
|
|
assert(n > 0);
|
|
if (n >= CHAR_BIT * sizeof(unsigned)) {
|
|
n -= CHAR_BIT * sizeof(unsigned);
|
|
p[0] = p[1];
|
|
p[1] = 0;
|
|
}
|
|
p[0] >>= n;
|
|
p[0] |= p[1] << (sizeof(unsigned) * CHAR_BIT - n);
|
|
p[1] >>= n;
|
|
}
|
|
|
|
static void sift(unsigned char *head, cmpfun cmp, void *arg, int pshift,
|
|
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1],
|
|
unsigned lp[hasatleast 12 * sizeof(unsigned)], size_t width) {
|
|
unsigned i;
|
|
unsigned char *rt, *lf;
|
|
i = 1;
|
|
ar[0] = head;
|
|
while (pshift > 1) {
|
|
rt = head - width;
|
|
lf = head - width - lp[pshift - 2];
|
|
if ((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) {
|
|
break;
|
|
}
|
|
if ((*cmp)(lf, rt, arg) >= 0) {
|
|
ar[i++] = lf;
|
|
head = lf;
|
|
pshift -= 1;
|
|
} else {
|
|
ar[i++] = rt;
|
|
head = rt;
|
|
pshift -= 2;
|
|
}
|
|
}
|
|
cycle(width, ar, i);
|
|
}
|
|
|
|
static void trinkle(unsigned char *head, cmpfun cmp, void *arg, unsigned pp[2],
|
|
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1],
|
|
unsigned lp[hasatleast 12 * sizeof(unsigned)], size_t width,
|
|
int pshift, int trusty) {
|
|
unsigned p[2];
|
|
unsigned i, trail;
|
|
unsigned char *stepson, *rt, *lf;
|
|
i = 1;
|
|
p[0] = pp[0];
|
|
p[1] = pp[1];
|
|
ar[0] = head;
|
|
while (p[0] != 1 || p[1] != 0) {
|
|
stepson = head - lp[pshift];
|
|
if ((*cmp)(stepson, ar[0], arg) <= 0) {
|
|
break;
|
|
}
|
|
if (!trusty && pshift > 1) {
|
|
rt = head - width;
|
|
lf = head - width - lp[pshift - 2];
|
|
if ((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) {
|
|
break;
|
|
}
|
|
}
|
|
ar[i++] = stepson;
|
|
head = stepson;
|
|
trail = pntz(p);
|
|
shr(p, trail);
|
|
pshift += trail;
|
|
trusty = 0;
|
|
}
|
|
if (!trusty) {
|
|
cycle(width, ar, i);
|
|
sift(head, cmp, arg, pshift, ar, lp, width);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Smoothsort is an adaptive linearithmic sorting algorithm that's
|
|
* nearly linear on mostly-sorted data, and consumes constant memory.
|
|
*/
|
|
static noinline void smoothsort(
|
|
void *base, size_t count, size_t width, cmpfun cmp, void *arg,
|
|
unsigned lp[hasatleast 12 * sizeof(unsigned)],
|
|
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1]) {
|
|
unsigned i, size = width * count;
|
|
unsigned char *head, *high;
|
|
unsigned p[2] = {1, 0};
|
|
unsigned pshift = 1;
|
|
unsigned trail;
|
|
if (!size) return;
|
|
head = (unsigned char *)base;
|
|
high = head + size - width;
|
|
/* Precompute Leonardo numbers, scaled by element width */
|
|
for (lp[0] = lp[1] = width, i = 2;
|
|
(lp[i] = lp[i - 2] + lp[i - 1] + width) < size; i++) {
|
|
}
|
|
while (head < high) {
|
|
if ((p[0] & 3) == 3) {
|
|
sift(head, cmp, arg, pshift, ar, lp, width);
|
|
shr(p, 2);
|
|
pshift += 2;
|
|
} else {
|
|
if (lp[pshift - 1] >= high - head) {
|
|
trinkle(head, cmp, arg, p, ar, lp, width, pshift, 0);
|
|
} else {
|
|
sift(head, cmp, arg, pshift, ar, lp, width);
|
|
}
|
|
if (pshift == 1) {
|
|
shl(p, 1);
|
|
pshift = 0;
|
|
} else {
|
|
shl(p, pshift - 1);
|
|
pshift = 1;
|
|
}
|
|
}
|
|
p[0] |= 1;
|
|
head += width;
|
|
}
|
|
trinkle(head, cmp, arg, p, ar, lp, width, pshift, 0);
|
|
while (pshift != 1 || p[0] != 1 || p[1] != 0) {
|
|
if (pshift <= 1) {
|
|
trail = pntz(p);
|
|
shr(p, trail);
|
|
pshift += trail;
|
|
} else {
|
|
shl(p, 2);
|
|
pshift -= 2;
|
|
p[0] ^= 7;
|
|
shr(p, 1);
|
|
trinkle(head - lp[pshift] - width, cmp, arg, p, ar, lp, width, pshift + 1,
|
|
1);
|
|
shl(p, 1);
|
|
p[0] |= 1;
|
|
trinkle(head - width, cmp, arg, p, ar, lp, width, pshift, 1);
|
|
}
|
|
head -= width;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sorts array.
|
|
*
|
|
* @param base points to an array to sort in-place
|
|
* @param count is the item count
|
|
* @param width is the size of each item
|
|
* @param cmp is a callback returning <0, 0, or >0
|
|
* @param arg will optionally be passed as the third argument to cmp
|
|
* @see qsort()
|
|
*/
|
|
void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
|
|
unsigned lp[12 * sizeof(unsigned)];
|
|
unsigned char *ar[14 * sizeof(unsigned) + 1];
|
|
smoothsort(base, count, width, (cmpfun)cmp, arg, lp, ar);
|
|
}
|
|
|
|
/**
|
|
* Sorts array.
|
|
*
|
|
* @param base points to an array to sort in-place
|
|
* @param count is the item count
|
|
* @param width is the size of each item
|
|
* @param cmp is a callback returning <0, 0, or >0
|
|
* @see qsort_r()
|
|
*/
|
|
void qsort(void *base, size_t count, size_t width,
|
|
int cmp(const void *, const void *)) {
|
|
qsort_r(base, count, width, (cmpfun)cmp, NULL);
|
|
}
|