diff --git a/Makefile b/Makefile index 274fe19d..28fbec6b 100644 --- a/Makefile +++ b/Makefile @@ -60,9 +60,10 @@ # # build/config.mk -SHELL = /bin/sh -HOSTS ?= freebsd openbsd alpine -SANITY := $(shell build/sanitycheck $$PPID) +SHELL = /bin/sh +HOSTS ?= freebsd openbsd alpine +SANITY := $(shell build/sanitycheck $$PPID) +GNUMAKEFLAGS += --output-sync .SUFFIXES: .DELETE_ON_ERROR: @@ -156,7 +157,6 @@ include tool/build/lib/buildlib.mk include tool/build/emucrt/emucrt.mk include tool/build/emubin/emubin.mk include tool/build/build.mk -include tool/debug/debug.mk include tool/decode/lib/decodelib.mk include tool/decode/decode.mk include tool/hash/hash.mk diff --git a/build/compile b/build/compile index 521cc5a8..2cf182ac 100755 --- a/build/compile +++ b/build/compile @@ -276,10 +276,5 @@ if "$@"; then exit 0 fi -if [ "$TERM" = "dumb" ]; then - f='%s %s\r\n\r\n' -else - f='\033[91m%s\033[39m \033[94m%s\033[39m\r\n\r\n' -fi -printf "$f" "$CCNAME $CCVERSION: compile $REASON:" "$*" >&2 +printf "$LOGFMT" "$CCNAME $CCVERSION: compile $REASON:" "$*" >&2 exit 1 diff --git a/build/config.mk b/build/config.mk index 352e74fa..7801beb6 100644 --- a/build/config.mk +++ b/build/config.mk @@ -17,9 +17,6 @@ CONFIG_CCFLAGS += \ $(FTRACE) \ -Og -CONFIG_COPTS += \ - -ftrapv - TARGET_ARCH ?= \ -march=k8-sse3 diff --git a/dsp/mpeg/ycbcrio.c b/dsp/mpeg/ycbcrio.c deleted file mode 100644 index 467e1589..00000000 --- a/dsp/mpeg/ycbcrio.c +++ /dev/null @@ -1,139 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "dsp/mpeg/mpeg.h" -#include "dsp/mpeg/ycbcrio.h" -#include "libc/bits/bits.h" -#include "libc/calls/calls.h" -#include "libc/calls/struct/stat.h" -#include "libc/log/check.h" -#include "libc/macros.h" -#include "libc/runtime/runtime.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/o.h" -#include "libc/sysv/consts/prot.h" - -static void CheckPlmFrame(const struct plm_frame_t *frame) { - CHECK_NE(0, frame->width); - CHECK_NE(0, frame->height); - CHECK_GE(frame->y.width, frame->width); - CHECK_GE(frame->y.height, frame->height); - CHECK_EQ(frame->cr.width, frame->cb.width); - CHECK_EQ(frame->cr.height, frame->cb.height); - CHECK_EQ(frame->y.width, frame->cr.width * 2); - CHECK_EQ(frame->y.height, frame->cr.height * 2); - CHECK_NOTNULL(frame->y.data); - CHECK_NOTNULL(frame->cr.data); - CHECK_NOTNULL(frame->cb.data); -} - -static size_t GetHeaderBytes(const struct plm_frame_t *frame) { - return MAX(sizeof(struct Ycbcrio), ROUNDUP(frame->y.width, 16)); -} - -static size_t GetPlaneBytes(const struct plm_plane_t *plane) { - /* - * planes must be 16-byte aligned, but due to their hugeness, and the - * recommendation of intel's 6,000 page manual, it makes sense to have - * planes on isolated 64kb frames for multiprocessing. - */ - return ROUNDUP(ROUNDUP(plane->height, 16) * ROUNDUP(plane->width, 16), - FRAMESIZE); -} - -static size_t CalcMapBytes(const struct plm_frame_t *frame) { - return ROUNDUP(GetHeaderBytes(frame) + GetPlaneBytes(&frame->y) + - GetPlaneBytes(&frame->cb) + GetPlaneBytes(&frame->cb), - FRAMESIZE); -} - -static void FixupPointers(struct Ycbcrio *map) { - map->frame.y.data = (unsigned char *)map + GetHeaderBytes(&map->frame); - map->frame.cr.data = map->frame.y.data + GetPlaneBytes(&map->frame.y); - map->frame.cb.data = map->frame.cr.data + GetPlaneBytes(&map->frame.cr); -} - -static struct Ycbcrio *YcbcrioOpenNew(const char *path, - const struct plm_frame_t *frame) { - int fd; - size_t size; - struct stat st; - struct Ycbcrio *map; - CheckPlmFrame(frame); - size = CalcMapBytes(frame); - CHECK_NE(-1, (fd = open(path, O_CREAT | O_RDWR, 0644))); - CHECK_NE(-1, fstat(fd, &st)); - if (st.st_size < size) { - CHECK_NE(-1, ftruncate(fd, size)); - } - CHECK_NE(MAP_FAILED, - (map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0))); - map->magic = YCBCRIO_MAGIC; - map->fd = fd; - map->size = size; - memcpy(&map->frame, frame, sizeof(map->frame)); - FixupPointers(map); - memcpy(&map->frame.y.data, frame->y.data, GetPlaneBytes(&frame->y)); - memcpy(&map->frame.cb.data, frame->cb.data, GetPlaneBytes(&frame->cb)); - memcpy(&map->frame.cr.data, frame->cr.data, GetPlaneBytes(&frame->cr)); - return map; -} - -static struct Ycbcrio *YcbcrioOpenExisting(const char *path) { - int fd; - struct stat st; - struct Ycbcrio *map; - CHECK_NE(-1, (fd = open(path, O_RDWR))); - CHECK_NE(-1, fstat(fd, &st)); - CHECK_NE(MAP_FAILED, (map = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0))); - CHECK_EQ(YCBCRIO_MAGIC, map->magic); - CHECK_GE(st.st_size, CalcMapBytes(&map->frame)); - FixupPointers(map); - map->fd = fd; - map->size = st.st_size; - return map; -} - -/** - * Opens shareable persistable MPEG video frame memory. - * - * @param path is a file name - * @param frame if NULL means open existing file, otherwise copies new - * @param points to pointer returned by YcbcrioOpen() which is cleared - * @return memory mapping needing YcbcrioClose() - */ -struct Ycbcrio *YcbcrioOpen(const char *path, const struct plm_frame_t *frame) { - if (frame) { - return YcbcrioOpenNew(path, frame); - } else { - return YcbcrioOpenExisting(path); - } -} - -/** - * Closes mapped video frame file. - * - * @param points to pointer returned by YcbcrioOpen() which is cleared - */ -void YcbcrioClose(struct Ycbcrio **map) { - CHECK_NE(-1, close_s(&(*map)->fd)); - CHECK_NE(-1, munmap_s(map, (*map)->size)); -} diff --git a/dsp/mpeg/ycbcrio.h b/dsp/mpeg/ycbcrio.h deleted file mode 100644 index c2a787bb..00000000 --- a/dsp/mpeg/ycbcrio.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef COSMOPOLITAN_DSP_MPEG_YCBCRIO_H_ -#define COSMOPOLITAN_DSP_MPEG_YCBCRIO_H_ -#include "dsp/mpeg/mpeg.h" -#include "libc/bits/bswap.h" -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -#define YCBCRIO_MAGIC bswap_32(0xBCCBCCBCu) - -/** - * Mappable persistable MPEG-2 video frame in Y-Cr-Cb colorspace. - */ -struct Ycbcrio { - uint32_t magic; - int32_t fd; - uint64_t size; - plm_frame_t frame; -}; - -struct Ycbcrio *YcbcrioOpen(const char *, const struct plm_frame_t *) - paramsnonnull((1)) vallocesque returnsnonnull; - -void YcbcrioClose(struct Ycbcrio **) paramsnonnull(); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_DSP_MPEG_YCBCRIO_H_ */ diff --git a/examples/fld.c b/examples/fld.c index fc00cae9..2d753c95 100644 --- a/examples/fld.c +++ b/examples/fld.c @@ -7,6 +7,7 @@ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif +#include "libc/bits/bits.h" #include "libc/inttypes.h" #include "libc/literal.h" #include "libc/math.h" @@ -34,7 +35,7 @@ void dobin(const char *op, long double x, FILE *f) { memcpy(buf, &x, sizeof(x)); memcpy(&lo, &buf[0], sizeof(lo)); memcpy(&hi, &buf[8], sizeof(hi)); - fprintf(f, "/\t%016" PRIb16 "%064" PRIb64 " %-8s %19.19Lf\n", hi, lo, op, x); + fprintf(f, "/\t%016" PRIb16 "%064" PRIb64 " %-8s % 19.19Lf\n", hi, lo, op, x); } void dohex(const char *op, long double x, FILE *f) { @@ -44,7 +45,7 @@ void dohex(const char *op, long double x, FILE *f) { memcpy(buf, &x, sizeof(x)); memcpy(&lo, &buf[0], sizeof(lo)); memcpy(&hi, &buf[8], sizeof(hi)); - fprintf(f, "/\t%04" PRIx16 "%016" PRIx64 " %-8s %19.19Lf\n", hi, lo, op, x); + fprintf(f, "/\t%04" PRIx16 "%016" PRIx64 " %-8s % 19.19Lf\n", hi, lo, op, x); } #define DOBIN(OP) \ diff --git a/examples/mappy.c b/examples/mappy.c index 25ac396e..299071ba 100644 --- a/examples/mappy.c +++ b/examples/mappy.c @@ -10,6 +10,7 @@ #include "libc/bits/bits.h" #include "libc/calls/calls.h" #include "libc/macros.h" +#include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" diff --git a/examples/rusage.c b/examples/rusage.c index 0bc0c181..a466859a 100644 --- a/examples/rusage.c +++ b/examples/rusage.c @@ -32,9 +32,10 @@ int main(int argc, char *argv[]) { int pid, wstatus; long double ts1, ts2; struct rusage rusage; + char pathbuf[PATH_MAX]; memset(&rusage, -1, sizeof(rusage)); CHECK_GT(argc, 1); - CHECK_NOTNULL((exe = commandv(argv[1]))); + CHECK_NOTNULL((exe = commandv(argv[1], pathbuf))); ts1 = nowl(); CHECK_NE(-1, (pid = spawnve(0, NULL, exe, &argv[1], environ))); CHECK_NE(-1, wait4(pid, &wstatus, 0, &rusage)); diff --git a/libc/calls/hefty/access.c b/libc/calls/access.c similarity index 100% rename from libc/calls/hefty/access.c rename to libc/calls/access.c diff --git a/libc/calls/calls.h b/libc/calls/calls.h index 638af166..e843a066 100644 --- a/libc/calls/calls.h +++ b/libc/calls/calls.h @@ -86,7 +86,7 @@ char *realpath(const char *, char *); char *replaceuser(const char *) nodiscard; char *slurp(const char *, size_t *) nodiscard; char *ttyname(int); -const char *commandv(const char *); +char *commandv(const char *, char[hasatleast PATH_MAX]); int access(const char *, int) nothrow; int arch_prctl(); int chdir(const char *); @@ -140,12 +140,8 @@ int mknodat(int, const char *, int32_t, uint64_t); int mlock(const void *, size_t); int mlock2(const void *, size_t, int); int mlockall(int); -int mprotect(void *, uint64_t, int) privileged; -int msync(void *, size_t, int); int munlock(const void *, size_t); int munlockall(void); -int munmap(void *, uint64_t); -int munmap_s(void *, uint64_t); int nice(int); int open(const char *, int, ...) nodiscard; int openanon(char *, unsigned) nodiscard; @@ -229,8 +225,6 @@ uint32_t gettid(void) nosideeffect; uint32_t getuid(void) nosideeffect; uint32_t umask(int32_t); void *getprocaddressmodule(const char *, const char *); -void *mmap(void *, uint64_t, int32_t, int32_t, int32_t, int64_t); -void *mremap(void *, uint64_t, uint64_t, int32_t, void *); #define getcwd(BUF, SIZE) \ (isconstant(BUF) && (&(BUF)[0] == NULL) ? get_current_dir_name() \ diff --git a/libc/calls/hefty/commandv.c b/libc/calls/commandv.c similarity index 64% rename from libc/calls/hefty/commandv.c rename to libc/calls/commandv.c index abc2157a..578e7a5f 100644 --- a/libc/calls/hefty/commandv.c +++ b/libc/calls/commandv.c @@ -17,28 +17,18 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/alg/alg.h" #include "libc/bits/progn.h" #include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/conv/conv.h" #include "libc/dce.h" #include "libc/errno.h" -#include "libc/mem/mem.h" #include "libc/nt/ntdll.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/consts/ok.h" #include "libc/sysv/errfuns.h" -static struct critbit0 g_commandv; - -textstartup static void g_commandv_init(void) { - __cxa_atexit(critbit0_clear, &g_commandv, NULL); -} - -const void *const g_commandv_ctor[] initarray = {g_commandv_init}; - static int accessexe(char pathname[hasatleast PATH_MAX], size_t len, const char *ext) { len = stpcpy(&pathname[len], ext) - &pathname[0]; @@ -76,21 +66,21 @@ static int accesscmd(char pathname[hasatleast PATH_MAX], const char *path, static int searchcmdpath(char pathname[hasatleast PATH_MAX], const char *name, size_t namelen) { int rc; - char *ep, *path, *pathtok; - struct critbit0 deduplicate; + char *path, *pathtok, ep[PATH_MAX]; rc = -1; - pathtok = ep = - strdup(firstnonnull(getenv("PATH"), "/bin:/usr/local/bin:/usr/bin")); - memset(&deduplicate, 0, sizeof(deduplicate)); + if (!memccpy(ep, + firstnonnull(emptytonull(getenv("PATH")), + "/bin:/usr/local/bin:/usr/bin"), + '\0', sizeof(ep))) { + return enomem(); + } + pathtok = ep; while ((path = strsep(&pathtok, IsWindows() ? ";" : ":"))) { if (strchr(path, '=')) continue; - if (!critbit0_insert(&deduplicate, path)) continue; if ((rc = accesscmd(pathname, path, name, namelen)) != -1) { break; } } - critbit0_clear(&deduplicate); - free(ep); return rc; } @@ -105,66 +95,29 @@ static char *mkcmdquery(const char *name, size_t namelen, return &scratch[0]; } -static const char *cachecmd(const char *name, size_t namelen, - const char *pathname, size_t pathnamelen) { - size_t entrylen; - char *res, *entry; - if ((entry = malloc((entrylen = namelen + 1 + pathnamelen) + 1))) { - mkcmdquery(name, namelen, entry); - res = memcpy(&entry[namelen + 1], pathname, pathnamelen + 1); - critbit0_emplace(&g_commandv, entry, entrylen); - } else { - res = NULL; - } - return res; -} - -static const char *getcmdcache(const char *name, size_t namelen, - char scratch[hasatleast PATH_MAX]) { - const char *entry; - if ((entry = critbit0_get(&g_commandv, mkcmdquery(name, namelen, scratch)))) { - return &entry[namelen + 1]; - } - return NULL; -} - -noinline static const char *findcmdpath(const char *name, - char pathname[hasatleast PATH_MAX]) { - char *p; - int rc, olderr; - size_t len; - olderr = errno; - if (!(len = strlen(name))) return PROGN(enoent(), NULL); - if (memchr(name, '=', len)) return PROGN(einval(), NULL); - if ((p = getcmdcache(name, len, pathname)) || - (((IsWindows() && - ((rc = accesscmd(pathname, kNtSystemDirectory, name, len)) != -1 || - (rc = accesscmd(pathname, kNtWindowsDirectory, name, len)) != -1)) || - (rc = accesscmd(pathname, "", name, len)) != -1 || - (!strpbrk(name, "/\\") && - (rc = searchcmdpath(pathname, name, len)) != -1)) && - (p = cachecmd(name, len, pathname, rc)))) { - errno = olderr; - return p; - } else { - return NULL; - } -} - /** - * Resolves pathname of executable. - * - * This does the same thing as `command -v` in bourne shell. Path - * lookups are cached for the lifetime of the process. Paths with - * multiple components will skip the resolution process. Undotted - * basenames get automatic .com and .exe suffix resolution on all - * platforms. Windows' system directories will always trump PATH. + * Resolves full pathname of executable. * * @return execve()'able path, or NULL w/ errno * @errno ENOENT, EACCES, ENOMEM * @see free(), execvpe() */ -const char *commandv(const char *name) { - char pathname[PATH_MAX]; - return findcmdpath(name, pathname); +char *commandv(const char *name, char pathbuf[hasatleast PATH_MAX]) { + char *p; + size_t len; + int rc, olderr; + olderr = errno; + if (!(len = strlen(name))) return PROGN(enoent(), NULL); + if (memchr(name, '=', len)) return PROGN(einval(), NULL); + if ((IsWindows() && + ((rc = accesscmd(pathbuf, kNtSystemDirectory, name, len)) != -1 || + (rc = accesscmd(pathbuf, kNtWindowsDirectory, name, len)) != -1)) || + (rc = accesscmd(pathbuf, "", name, len)) != -1 || + (!strpbrk(name, "/\\") && + (rc = searchcmdpath(pathbuf, name, len)) != -1)) { + errno = olderr; + return pathbuf; + } else { + return NULL; + } } diff --git a/libc/calls/hefty/faccessat-nt.c b/libc/calls/faccessat-nt.c similarity index 100% rename from libc/calls/hefty/faccessat-nt.c rename to libc/calls/faccessat-nt.c diff --git a/libc/calls/hefty/faccessat.c b/libc/calls/faccessat.c similarity index 100% rename from libc/calls/hefty/faccessat.c rename to libc/calls/faccessat.c diff --git a/libc/calls/g_fds_init.S b/libc/calls/g_fds_init.S index 69b9d40f..658a50a4 100644 --- a/libc/calls/g_fds_init.S +++ b/libc/calls/g_fds_init.S @@ -19,11 +19,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.h" - .init.start 300,_init_g_fds + .init.start 302,_init_g_fds push %rdi push %rsi call InitializeFileDescriptors pop %rsi pop %rdi - .init.end 300,_init_g_fds + .init.end 302,_init_g_fds .source __FILE__ diff --git a/libc/calls/getenv.c b/libc/calls/getenv.c index 19932159..1347c239 100644 --- a/libc/calls/getenv.c +++ b/libc/calls/getenv.c @@ -25,16 +25,22 @@ /** * Returns value of environment variable, or NULL if not found. */ -char *getenv(const char *name) { - char **ep; - size_t i, namelen; - char *empty[1] = {0}; - ep = environ; - if (!ep) ep = empty; - namelen = strlen(name); - for (i = 0; ep[i]; ++i) { - if (strncmp(ep[i], name, namelen) == 0 && ep[i][namelen] == '=') { - return &ep[i][namelen + 1]; +char *getenv(const char *s) { + char **p; + size_t i, j; + if ((p = environ)) { + for (i = 0; p[i]; ++i) { + for (j = 0;; ++j) { + if (!s[j]) { + if (p[i][j] == '=') { + return &p[i][j + 1]; + } + break; + } + if (s[j] != p[i][j]) { + break; + } + } } } return NULL; diff --git a/libc/calls/hefty/execle.c b/libc/calls/hefty/execle.c index 71695b58..9a5cf73f 100644 --- a/libc/calls/hefty/execle.c +++ b/libc/calls/hefty/execle.c @@ -17,9 +17,9 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/mem.h" -#include "libc/calls/hefty/mkvarargv.h" #include "libc/calls/calls.h" +#include "libc/calls/hefty/mkvarargv.h" +#include "libc/mem/mem.h" /** * Executes program, with custom environment. diff --git a/libc/calls/hefty/execlp.c b/libc/calls/hefty/execlp.c index 86174a49..d2202c1b 100644 --- a/libc/calls/hefty/execlp.c +++ b/libc/calls/hefty/execlp.c @@ -17,10 +17,10 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/calls/hefty/mkvarargv.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" -#include "libc/calls/hefty/mkvarargv.h" -#include "libc/calls/calls.h" /** * Executes program, with PATH search and current environment. @@ -36,7 +36,8 @@ */ int execlp(const char *prog, const char *arg, ... /*, NULL*/) { char *exe; - if ((exe = commandv(prog))) { + char pathbuf[PATH_MAX]; + if ((exe = commandv(prog, pathbuf))) { va_list va; void *argv; va_start(va, arg); diff --git a/libc/calls/hefty/execvpe.c b/libc/calls/hefty/execvpe.c index 6ae780d4..df8676cb 100644 --- a/libc/calls/hefty/execvpe.c +++ b/libc/calls/hefty/execvpe.c @@ -17,8 +17,8 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/mem.h" #include "libc/calls/calls.h" +#include "libc/mem/mem.h" /** * Executes program, with path environment search. @@ -33,7 +33,8 @@ */ int execvpe(const char *prog, char *const argv[], char *const *envp) { char *exe; - if ((exe = commandv(prog))) { + char pathbuf[PATH_MAX]; + if ((exe = commandv(prog, pathbuf))) { execve(exe, argv, envp); } return -1; diff --git a/libc/calls/hefty/sortenvp.c b/libc/calls/hefty/sortenvp.c index e20acc8b..267fc3fc 100644 --- a/libc/calls/hefty/sortenvp.c +++ b/libc/calls/hefty/sortenvp.c @@ -24,17 +24,18 @@ #include "libc/nexgen32e/tinystrcmp.h" #include "libc/str/str.h" -static int sortenvpcb(const char **a, const char **b) { return strcmp(*a, *b); } +static int CompareStrings(const char *l, const char *r) { + size_t i = 0; + while (l[i] == r[i] && r[i]) ++i; + return (l[i] & 0xff) - (r[i] & 0xff); +} -static void slowsort(char **a, int n) { +static void SortStrings(char **a, size_t n) { + char *t; size_t i, j; - const char *t; for (i = 1; i < n; ++i) { - j = i; - t = a[i]; - while (j > 0 && tinystrcmp(t, a[j - 1]) < 0) { + for (t = a[i], j = i; j > 0 && CompareStrings(t, a[j - 1]) < 0; --j) { a[j] = a[j - 1]; - --j; } a[j] = t; } @@ -52,17 +53,14 @@ static void slowsort(char **a, int n) { * @return newly allocated sorted copy of envp pointer array */ hidden textwindows nodiscard char **sortenvp(char *const envp[]) { - size_t count = 0; - while (envp[count]) count++; - size_t bytesize = (count + 1) * sizeof(char *); - char **copy = malloc(bytesize); - if (copy) { - memcpy(copy, envp, bytesize); - if (IsTiny()) { - slowsort(copy, count); - } else { - qsort(copy, count, sizeof(char *), (void *)sortenvpcb); - } + char **copy; + size_t n, size; + n = 0; + while (envp[n]) n++; + size = (n + 1) * sizeof(char *); + if ((copy = malloc(size))) { + memcpy(copy, envp, size); + SortStrings(copy, n); } return copy; } diff --git a/libc/calls/hefty/spawnlp.c b/libc/calls/hefty/spawnlp.c index d6f58f6a..c7112cef 100644 --- a/libc/calls/hefty/spawnlp.c +++ b/libc/calls/hefty/spawnlp.c @@ -40,8 +40,9 @@ nodiscard int spawnlp(unsigned flags, int stdiofds[3], const char *prog, char *exe; va_list va; void *argv; + char pathbuf[PATH_MAX]; pid = -1; - if ((exe = commandv(prog))) { + if ((exe = commandv(prog, pathbuf))) { va_start(va, arg); if ((argv = mkvarargv(arg, va))) { pid = spawnve(flags, stdiofds, exe, argv, environ); diff --git a/libc/runtime/kntsystemdirectory.S b/libc/calls/kntsystemdirectory.S similarity index 100% rename from libc/runtime/kntsystemdirectory.S rename to libc/calls/kntsystemdirectory.S diff --git a/libc/runtime/kntwindowsdirectory.S b/libc/calls/kntwindowsdirectory.S similarity index 100% rename from libc/runtime/kntwindowsdirectory.S rename to libc/calls/kntwindowsdirectory.S diff --git a/libc/calls/hefty/ntaccesscheck.c b/libc/calls/ntaccesscheck.c similarity index 82% rename from libc/calls/hefty/ntaccesscheck.c rename to libc/calls/ntaccesscheck.c index f517feb0..4e56072d 100644 --- a/libc/calls/hefty/ntaccesscheck.c +++ b/libc/calls/ntaccesscheck.c @@ -17,7 +17,8 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/mem/mem.h" +#include "libc/calls/calls.h" +#include "libc/calls/internal.h" #include "libc/nt/enum/accessmask.h" #include "libc/nt/enum/securityinformation.h" #include "libc/nt/errors.h" @@ -28,33 +29,33 @@ #include "libc/nt/struct/securitydescriptor.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" -#include "libc/calls/internal.h" -#include "libc/calls/calls.h" #include "libc/sysv/consts/ok.h" /** - * Checks if current process has access to folder or file. + * Asks Microsoft if we're authorized to use a folder or file. + * + * Implementation Details: MSDN documentation imposes no limit on the + * internal size of SECURITY_DESCRIPTOR, which we are responsible for + * allocating. We've selected 1024 which shall hopefully be adequate. * * @param flags can have R_OK, W_OK, X_OK, etc. * @return 0 if authorized, or -1 w/ errno - * @kudos Aaron Ballman for teaching how to do this + * @kudos Aaron Ballman for teaching this * @see libc/sysv/consts.sh */ textwindows int ntaccesscheck(const char16_t *pathname, uint32_t flags) { int rc; bool32 result; + struct NtGenericMapping mapping; + struct NtPrivilegeSet privileges; int64_t hToken, hImpersonatedToken; uint32_t secsize, granted, privsize; - struct NtPrivilegeSet privileges; - struct NtGenericMapping mapping; - struct NtSecurityDescriptor security; - struct NtSecurityDescriptor *psecurity; - const uint32_t request = kNtOwnerSecurityInformation | - kNtGroupSecurityInformation | - kNtDaclSecurityInformation; + union NtSecurityDescriptorLol { + struct NtSecurityDescriptor s; + char b[1024]; + } security; granted = 0; result = false; - psecurity = &security; secsize = sizeof(security); privsize = sizeof(privileges); memset(&privileges, 0, sizeof(privileges)); @@ -64,23 +65,23 @@ textwindows int ntaccesscheck(const char16_t *pathname, uint32_t flags) { mapping.GenericAll = kNtFileAllAccess; MapGenericMask(&flags, &mapping); hImpersonatedToken = hToken = -1; - if ((GetFileSecurity(pathname, request, psecurity, 0, &secsize) || - (GetLastError() == kNtErrorInsufficientBuffer && - (psecurity = malloc(secsize)) && - GetFileSecurity(pathname, request, psecurity, secsize, &secsize))) && + if (GetFileSecurity(pathname, + kNtOwnerSecurityInformation | + kNtGroupSecurityInformation | + kNtDaclSecurityInformation, + &security.s, 0, &secsize) && OpenProcessToken(GetCurrentProcess(), kNtTokenImpersonate | kNtTokenQuery | kNtTokenDuplicate | kNtStandardRightsRead, &hToken) && DuplicateToken(hToken, kNtSecurityImpersonation, &hImpersonatedToken) && - AccessCheck(psecurity, hImpersonatedToken, flags, &mapping, &privileges, + AccessCheck(&security.s, hImpersonatedToken, flags, &mapping, &privileges, &privsize, &granted, &result) && (result || flags == F_OK)) { rc = 0; } else { rc = winerr(); } - free_s(&psecurity); close(hImpersonatedToken); close(hToken); return rc; diff --git a/libc/calls/vdprintf.c b/libc/calls/vdprintf.c index ff6dfe71..c3ec4de1 100644 --- a/libc/calls/vdprintf.c +++ b/libc/calls/vdprintf.c @@ -57,10 +57,7 @@ int(vdprintf)(int fd, const char *fmt, va_list va) { struct VdprintfState df; df.n = 0; df.fd = fd; - if (palandprintf(vdprintfputchar, &df, fmt, va) != -1 || - vdprintf_flush(&df, df.n & (ARRAYLEN(df.buf) - 1)) != -1) { - return df.n; - } else { - return -1; - } + if (palandprintf(vdprintfputchar, &df, fmt, va) == -1) return -1; + if (vdprintf_flush(&df, df.n & (ARRAYLEN(df.buf) - 1)) == -1) return -1; + return df.n; } diff --git a/libc/conv/sizemultiply.h b/libc/conv/sizemultiply.h deleted file mode 100644 index 9323d6fe..00000000 --- a/libc/conv/sizemultiply.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_CONV_SIZEMULTIPLY_H_ -#define COSMOPOLITAN_LIBC_CONV_SIZEMULTIPLY_H_ -#include "libc/limits.h" -#if !(__ASSEMBLER__ + __LINKER__ + 0) - -/** - * Multiplies memory sizes. - * - * @param count may be 0 to for realloc() → free() behavior - * @param opt_out set to count*itemsize or SIZE_MAX on overflow - * @return true on success or false on overflow - */ -forceinline bool sizemultiply(size_t *opt_out, size_t count, size_t itemsize) { - size_t res = 0; - bool overflowed = false; - if (count != 0) { - res = count * itemsize; - if (((count | itemsize) & ~0xfffful) && (res / count != itemsize)) { - overflowed = true; - res = SIZE_MAX; - } - } - if (opt_out) *opt_out = res; - return !overflowed; -} - -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_CONV_SIZEMULTIPLY_H_ */ diff --git a/libc/crypto/unrijndaelinit.c b/libc/crypto/unrijndaelinit.c index 08c55038..c55f5545 100644 --- a/libc/crypto/unrijndaelinit.c +++ b/libc/crypto/unrijndaelinit.c @@ -33,7 +33,7 @@ static void unrijndaelinit$westmere(struct Rijndael *ctx, uint32_t n, x = ctx->rk[i].xmm; asm("aesimc\t%1,%0" : "=x"(x) : "0"(x)); ctx->rk[i].xmm = x; - } while (i++ < n); + } while (++i < n); XMM_DESTROY(x); } @@ -49,7 +49,7 @@ static relegated noinline void unrijndaelinit$pure(struct Rijndael *ctx, x = ctx->rk[i].xmm; x = InvMixColumns(x); ctx->rk[i].xmm = x; - } while (i++ < n); + } while (++i < n); XMM_DESTROY(x); } diff --git a/libc/dns/parsehoststxt.c b/libc/dns/parsehoststxt.c index 40df14de..e5e68a3d 100644 --- a/libc/dns/parsehoststxt.c +++ b/libc/dns/parsehoststxt.c @@ -45,10 +45,11 @@ */ int parsehoststxt(struct HostsTxt *ht, FILE *f) { int rc = 0; - char stackline[128]; - char *line = stackline; - size_t linecap = sizeof(stackline); - while ((getline(&line, &linecap, f)) != -1) { + char *line; + size_t linesize; + line = NULL; + linesize = 0; + while ((getline(&line, &linesize, f)) != -1) { struct HostsTxtEntry entry; char *addr, *name, *tok, *comment; if ((comment = strchr(line, '#'))) *comment = '\0'; @@ -64,6 +65,6 @@ int parsehoststxt(struct HostsTxt *ht, FILE *f) { } } } - free_s(&line); + free(line); return rc | ferror(f); } diff --git a/libc/dns/parseresolvconf.c b/libc/dns/parseresolvconf.c index 6620b581..c9fb116a 100644 --- a/libc/dns/parseresolvconf.c +++ b/libc/dns/parseresolvconf.c @@ -20,6 +20,7 @@ #include "libc/alg/arraylist.h" #include "libc/dns/dns.h" #include "libc/dns/resolvconf.h" +#include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/sock/sock.h" #include "libc/stdio/stdio.h" @@ -44,13 +45,14 @@ int parseresolvconf(struct ResolvConf *resolv, struct FILE *f) { /* TODO(jart): options ndots:5 */ int rc = 0; - char stackline[32]; - char *line = stackline; - size_t linecap = sizeof(stackline); + char *line; + size_t linesize; struct sockaddr_in nameserver; + line = NULL; + linesize = 0; nameserver.sin_family = AF_INET; nameserver.sin_port = htons(DNS_PORT); - while (getline(&line, &linecap, f) != -1) { + while (getline(&line, &linesize, f) != -1) { char *directive, *value, *tok, *comment; if ((comment = strchr(line, '#'))) *comment = '\0'; if ((directive = strtok_r(line, " \t\r\n\v", &tok)) && @@ -61,6 +63,6 @@ int parseresolvconf(struct ResolvConf *resolv, struct FILE *f) { } } } - free_s(&line); + free(line); return rc | ferror(f); } diff --git a/libc/fmt/unbing.c b/libc/fmt/unbing.c index 319628a4..5c54df91 100644 --- a/libc/fmt/unbing.c +++ b/libc/fmt/unbing.c @@ -56,14 +56,7 @@ static int g_cp437i[256 + ARRAYLEN(kCp437iMultimappings)]; * @see bing() */ int unbing(int c) { - int i, m, l, r; - static bool once; - if (!once) { - for (i = 0; i < 256; ++i) g_cp437i[i] = kCp437[i] << 8 | i; - memcpy(g_cp437i + 256, kCp437iMultimappings, sizeof(kCp437iMultimappings)); - insertionsort(ARRAYLEN(g_cp437i), g_cp437i); - once = true; - } + int m, l, r; l = 0; r = ARRAYLEN(g_cp437i) - 1; while (l <= r) { @@ -78,3 +71,12 @@ int unbing(int c) { } return -1; } + +static textstartup void g_cp437i_init() { + unsigned i; + for (i = 0; i < 256; ++i) g_cp437i[i] = kCp437[i] << 8 | i; + memcpy(g_cp437i + 256, kCp437iMultimappings, sizeof(kCp437iMultimappings)); + djbsort(ARRAYLEN(g_cp437i), g_cp437i); +} + +const void *const g_cp437i_ctor[] initarray = {g_cp437i_init}; diff --git a/libc/integral/c.inc b/libc/integral/c.inc index 3101b9ce..3955c208 100644 --- a/libc/integral/c.inc +++ b/libc/integral/c.inc @@ -336,6 +336,7 @@ typedef uint64_t uintmax_t; * 4. unprofilable * 5. unhookable * + * @note consider static or writing a macro * @see externinline */ #ifndef forceinline diff --git a/libc/integral/normalize.inc b/libc/integral/normalize.inc index 587d6008..29802cf2 100644 --- a/libc/integral/normalize.inc +++ b/libc/integral/normalize.inc @@ -66,8 +66,8 @@ #endif #define BIGPAGESIZE 0x200000 +#define STACKSIZE 0x20000 #define FRAMESIZE 0x10000 /* 8086 */ -#define STACKSIZE 0x10000 /* goog */ #define PAGESIZE 0x1000 /* i386+ */ #define BUFSIZ 0x1000 /* best stdio default */ #define CACHELINE 0x40 /* nexgen32e */ diff --git a/libc/intrin/intrin.mk b/libc/intrin/intrin.mk index 06594bb6..98f5dfe8 100644 --- a/libc/intrin/intrin.mk +++ b/libc/intrin/intrin.mk @@ -13,8 +13,7 @@ LIBC_INTRIN_A_SRCS = $(LIBC_INTRIN_A_SRCS_S) $(LIBC_INTRIN_A_SRCS_C) LIBC_INTRIN_A_CHECKS = $(LIBC_INTRIN_A).pkg LIBC_INTRIN_A_FILES := \ - $(wildcard libc/intrin/*) \ - $(wildcard libc/intrin/delegates/*) + $(wildcard libc/intrin/*) LIBC_INTRIN_A_OBJS = \ $(LIBC_INTRIN_A_SRCS:%=o/$(MODE)/%.zip.o) \ diff --git a/libc/intrin/repmovsb.h b/libc/intrin/repmovsb.h index bebff72e..bb15832e 100644 --- a/libc/intrin/repmovsb.h +++ b/libc/intrin/repmovsb.h @@ -2,7 +2,7 @@ #define COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_ #if !(__ASSEMBLER__ + __LINKER__ + 0) -static void repmovsb(void **dest, const void **src, size_t cx) { +forceinline void repmovsb(void **dest, const void **src, size_t cx) { char *di = (char *)*dest; const char *si = (const char *)*src; while (cx) *di++ = *si++, cx--; diff --git a/libc/intrin/repstosb.h b/libc/intrin/repstosb.h index 4536f7f8..e733a6f0 100644 --- a/libc/intrin/repstosb.h +++ b/libc/intrin/repstosb.h @@ -2,7 +2,7 @@ #define COSMOPOLITAN_LIBC_INTRIN_REPSTOSB_H_ #if !(__ASSEMBLER__ + __LINKER__ + 0) -static void *repstosb(void *dest, unsigned char al, size_t cx) { +forceinline void *repstosb(void *dest, unsigned char al, size_t cx) { unsigned char *di = (unsigned char *)dest; while (cx) *di++ = al, cx--; return di; diff --git a/libc/log/asan.c b/libc/log/asan.c new file mode 100644 index 00000000..0c7f54b1 --- /dev/null +++ b/libc/log/asan.c @@ -0,0 +1,370 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/bits/safemacros.h" +#include "libc/bits/weaken.h" +#include "libc/calls/calls.h" +#include "libc/conv/conv.h" +#include "libc/conv/itoa.h" +#include "libc/log/asan.h" +#include "libc/log/backtrace.h" +#include "libc/log/log.h" +#include "libc/macros.h" +#include "libc/mem/hook/hook.h" +#include "libc/runtime/directmap.h" +#include "libc/runtime/internal.h" +#include "libc/runtime/memtrack.h" +#include "libc/runtime/missioncritical.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/symbols.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" +#include "third_party/dlmalloc/dlmalloc.h" + +/** + * @fileoverview Cosmopolitan Address Sanitizer Runtime. + * + * Someone brilliant at Google figured out a way to improve upon memory + * protection. Rather than invent another Java or Rust they changed GCC + * so it can emit fast code, that checks the validity of each memory op + * with byte granularity, by probing shadow memory. + * + * AddressSanitizer dedicates one-eighth of the virtual address space + * to its shadow memory and uses a direct mapping with a scale and + * offset to translate an application address to its corresponding + * shadow address. Given the application memory address Addr, the + * address of the shadow byte is computed as (Addr>>3)+Offset." + * + * We use the following encoding for each shadow byte: 0 means that + * all 8 bytes of the corresponding application memory region are + * addressable; k (1 ≤ k ≤ 7) means that the first k bytes are + * addressible; any negative value indicates that the entire 8-byte + * word is unaddressable. We use different negative values to + * distinguish between different kinds of unaddressable memory (heap + * redzones, stack redzones, global redzones, freed memory). + * + * Here's what the generated code looks like for 64-bit reads: + * + * movq %addr,%tmp + * shrq $3,%tmp + * cmpb $0,0x7fff8000(%tmp) + * jnz abort + * movq (%addr),%dst + */ + +#define HOOK(HOOK, IMPL) \ + if (weaken(HOOK)) { \ + *weaken(HOOK) = IMPL; \ + } + +struct AsanSourceLocation { + const char *filename; + int line; + int column; +}; + +struct AsanAccessInfo { + const char *addr; + const char *first_bad_addr; + size_t size; + bool iswrite; + unsigned long ip; +}; + +struct AsanGlobal { + const char *addr; + size_t size; + size_t size_with_redzone; + const void *name; + const void *module_name; + unsigned long has_cxx_init; + struct AsanSourceLocation *location; + char *odr_indicator; +}; + +static bool __asan_is_mapped(void *p) { + int x, i; + x = (intptr_t)p >> 16; + i = FindMemoryInterval(&_mmi, x); + return i < _mmi.i && x >= _mmi.p[i].x && x <= _mmi.p[i].y; +} + +void __asan_map_shadow(void *addr, size_t size) { + int i, n, x; + char *a, *b; + struct DirectMap sm; + a = (char *)ROUNDDOWN(SHADOW((intptr_t)addr), FRAMESIZE); + b = (char *)ROUNDDOWN(SHADOW((intptr_t)addr + size - 1), FRAMESIZE); + for (; a <= b; a += FRAMESIZE) { + if (!__asan_is_mapped(a)) { + sm = DirectMap(a, FRAMESIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (sm.addr == MAP_FAILED || + TrackMemoryInterval(&_mmi, (intptr_t)a >> 16, (intptr_t)a >> 16, + sm.maphandle) == -1) { + abort(); + } + } + } +} + +size_t __asan_malloc_usable_size(const void *vp) { + char *s; + size_t n; + for (n = 0, s = (char *)SHADOW((intptr_t)vp);; ++s) { + if (!*s) { + n += 8; + } else if (*s > 0) { + n += *s & 7; + } else { + break; + } + } + return n; +} + +void *__asan_allocate(size_t align, size_t size, int underrun, int overrun) { + char *p, *s; + size_t q, r, i; + if (!(p = dlmemalign(align, ROUNDUP(size, 8) + 16))) return NULL; + s = (char *)SHADOW((intptr_t)p - 16); + q = size / 8; + r = size % 8; + *s++ = underrun; + *s++ = underrun; + memset(s, 0, q); + s += q; + if (r) *s++ = r; + *s++ = overrun; + *s++ = overrun; + return p; +} + +void __asan_deallocate(char *p, int kind) { + char *s; + size_t n; + s = (char *)SHADOW((intptr_t)p); + n = dlmalloc_usable_size(p); + n /= 8; + memset(s, kind, n); + dlfree(p); +} + +void __asan_free(void *vp) { + __asan_deallocate(vp, kAsanHeapFree); +} + +void *__asan_memalign(size_t align, size_t size) { + return __asan_allocate(align, size, kAsanHeapUnderrun, kAsanHeapOverrun); +} + +void *__asan_malloc(size_t size) { + return __asan_memalign(16, size); +} + +void *__asan_calloc(size_t n, size_t m) { + char *p; + size_t size; + if (__builtin_mul_overflow(n, m, &size)) size = -1; + if ((p = __asan_malloc(size))) memset(p, 0, size); + return p; +} + +void *__asan_realloc(void *p, size_t n) { + char *p2; + if (p) { + if (n) { + if ((p2 = __asan_malloc(n))) { + memcpy(p2, p, min(n, dlmalloc_usable_size(p))); + __asan_deallocate(p, kAsanRelocated); + } + } else { + __asan_free(p); + p2 = NULL; + } + } else { + p2 = __asan_malloc(n); + } + return p2; +} + +void *__asan_valloc(size_t n) { + return __asan_memalign(PAGESIZE, n); +} + +void *__asan_pvalloc(size_t n) { + return __asan_valloc(ROUNDUP(n, PAGESIZE)); +} + +void __asan_poison(intptr_t addr, size_t size, size_t redsize, int kind) { + char *s; + intptr_t p; + size_t a, b, w; + w = (intptr_t)addr & 7; + p = (intptr_t)addr - w; + a = w + size; + b = w + redsize; + s = (char *)SHADOW(p + a); + if (a & 7) *s++ = a & 7; + memset(s, kind, (b - ROUNDUP(a, 8)) >> 3); +} + +void __asan_register_globals(struct AsanGlobal g[], int n) { + size_t i; + for (i = 0; i < n; ++i) { + __asan_poison((intptr_t)g[i].addr, g[i].size, g[i].size_with_redzone, + kAsanGlobalOverrun); + } +} + +void __asan_report_memory_fault(uint8_t *addr, int size, const char *kind) { + char *p, *s, ibuf[21], buf[256]; + switch (*(char *)SHADOW((intptr_t)addr)) { + case kAsanStackFree: + s = "stack use after release"; + break; + case kAsanHeapFree: + s = "heap use after free"; + break; + case kAsanRelocated: + s = "heap use after relocate"; + break; + case kAsanHeapUnderrun: + s = "heap underrun"; + break; + case kAsanHeapOverrun: + s = "heap overrun"; + break; + case kAsanStackUnderrun: + s = "stack underflow"; + break; + case kAsanStackOverrun: + s = "stack overflow"; + break; + case kAsanAllocaOverrun: + s = "alloca overflow"; + break; + case kAsanUnscoped: + s = "unscoped"; + break; + default: + s = "poisoned"; + break; + } + p = buf; + p = stpcpy(p, "error: "); + p = stpcpy(p, s); + p = stpcpy(p, " "); + uint64toarray_radix10(size, ibuf); + p = stpcpy(p, ibuf); + p = stpcpy(p, "-byte "); + p = stpcpy(p, kind); + p = stpcpy(p, " at 0x"); + uint64toarray_fixed16((intptr_t)addr, ibuf, 48); + p = stpcpy(p, ibuf); + p = stpcpy(p, "\n"); + __print(buf, p - buf); + PrintBacktraceUsingSymbols(stderr, __builtin_frame_address(0), + getsymboltable()); + DebugBreak(); + _Exit(66); +} + +void *__asan_stack_malloc(size_t size, int classid) { + return __asan_allocate(32, size, kAsanStackUnderrun, kAsanStackOverrun); +} + +void __asan_stack_free(char *p, size_t size, int classid) { + return __asan_deallocate(p, kAsanStackFree); +} + +void __asan_report_load_n(uint8_t *addr, int size) { + __asan_report_memory_fault(addr, size, "load"); +} + +void __asan_report_store_n(uint8_t *addr, int size) { + __asan_report_memory_fault(addr, size, "store"); +} + +void __asan_poison_stack_memory(uintptr_t p, size_t n) { + memset((char *)SHADOW(p), kAsanUnscoped, n >> 3); + if (n & 7) *(char *)SHADOW(p + n) = 8 - (n & 7); +} + +void __asan_unpoison_stack_memory(uintptr_t p, size_t n) { + memset((char *)SHADOW(p), 0, n >> 3); + if (n & 7) *(char *)SHADOW(p + n) = n & 7; +} + +void __asan_loadN(intptr_t ptr, size_t size) { + DebugBreak(); +} + +void __asan_storeN(intptr_t ptr, size_t size) { + DebugBreak(); +} + +void __asan_alloca_poison(intptr_t addr, size_t size) { + __asan_poison(addr, size, size + 32, kAsanAllocaOverrun); +} + +void __asan_allocas_unpoison(uintptr_t top, uintptr_t bottom) { + memset((char *)SHADOW(top), 0, (bottom - top) >> 3); +} + +void *__asan_addr_is_in_fake_stack(void *fakestack, void *addr, void **beg, + void **end) { + return NULL; +} + +void *__asan_get_current_fake_stack(void) { + return NULL; +} + +void __asan_install_malloc_hooks(void) { + HOOK(hook$free, __asan_free); + HOOK(hook$malloc, __asan_malloc); + HOOK(hook$calloc, __asan_calloc); + HOOK(hook$valloc, __asan_valloc); + HOOK(hook$pvalloc, __asan_pvalloc); + HOOK(hook$realloc, __asan_realloc); + HOOK(hook$memalign, __asan_memalign); + HOOK(hook$malloc_usable_size, __asan_malloc_usable_size); +} + +void __asan_init(int argc, char *argv[], char **envp, intptr_t *auxv) { + int i; + static bool once; + register intptr_t rsp asm("rsp"); + if (!once) { + __asan_map_shadow(_base, _end - _base); + __asan_map_shadow((void *)ROUNDDOWN(rsp, STACKSIZE), STACKSIZE); + for (i = 0; i < argc; ++i) __asan_map_shadow(argv[i], strlen(argv[i])); + for (; *envp; ++envp) __asan_map_shadow(*envp, strlen(*envp)); + __asan_map_shadow(auxv, sizeof(intptr_t) * 2); + __asan_install_malloc_hooks(); + once = true; + } +} + +const void *const g_asan_ctor[] initarray = {getsymboltable}; diff --git a/libc/log/asan.h b/libc/log/asan.h new file mode 100644 index 00000000..ca15d460 --- /dev/null +++ b/libc/log/asan.h @@ -0,0 +1,21 @@ +#ifndef COSMOPOLITAN_LIBC_LOG_ASAN_H_ +#define COSMOPOLITAN_LIBC_LOG_ASAN_H_ + +#define kAsanScale 3 +#define kAsanMagic 0x7fff8000 +#define kAsanHeapFree -1 +#define kAsanStackFree -2 +#define kAsanRelocated -3 +#define kAsanHeapUnderrun -4 +#define kAsanHeapOverrun -5 +#define kAsanGlobalOverrun -6 +#define kAsanStackUnderrun -7 +#define kAsanStackOverrun -8 +#define kAsanAllocaOverrun -9 +#define kAsanUnscoped -10 + +#define SHADOW(x) (((x) >> kAsanScale) + kAsanMagic) + +void __asan_map_shadow(void *, size_t); + +#endif /* COSMOPOLITAN_LIBC_LOG_ASAN_H_ */ diff --git a/libc/log/attachdebugger.c b/libc/log/attachdebugger.c index 16b9962c..72ce8996 100644 --- a/libc/log/attachdebugger.c +++ b/libc/log/attachdebugger.c @@ -23,6 +23,7 @@ #include "libc/fmt/fmt.h" #include "libc/log/gdb.h" #include "libc/log/log.h" +#include "libc/nexgen32e/stackframe.h" #include "libc/nexgen32e/vendor.h" #include "libc/paths.h" #include "libc/runtime/runtime.h" diff --git a/libc/log/backtrace.c b/libc/log/backtrace.c index f5e3f702..459c8813 100644 --- a/libc/log/backtrace.c +++ b/libc/log/backtrace.c @@ -17,8 +17,7 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" -#include "libc/log/log.h" +#include "libc/log/backtrace.h" void backtrace(FILE *f) { showbacktrace(f, __builtin_frame_address(0)); diff --git a/libc/log/backtrace.h b/libc/log/backtrace.h new file mode 100644 index 00000000..ce31ae14 --- /dev/null +++ b/libc/log/backtrace.h @@ -0,0 +1,15 @@ +#ifndef COSMOPOLITAN_LIBC_LOG_BACKTRACE_H_ +#define COSMOPOLITAN_LIBC_LOG_BACKTRACE_H_ +#include "libc/nexgen32e/stackframe.h" +#include "libc/runtime/symbols.h" +#include "libc/stdio/stdio.h" +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +void showbacktrace(FILE *, const struct StackFrame *); +int PrintBacktraceUsingSymbols(FILE *, const struct StackFrame *, + struct SymbolTable *); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_LOG_BACKTRACE_H_ */ diff --git a/libc/log/backtrace2.c b/libc/log/backtrace2.c index a08a6df3..163cc43c 100644 --- a/libc/log/backtrace2.c +++ b/libc/log/backtrace2.c @@ -26,6 +26,7 @@ #include "libc/conv/conv.h" #include "libc/dce.h" #include "libc/fmt/fmt.h" +#include "libc/log/backtrace.h" #include "libc/log/log.h" #include "libc/nexgen32e/gc.h" #include "libc/runtime/runtime.h" @@ -37,54 +38,7 @@ #define kBacktraceMaxFrames 128 #define kBacktraceBufSize ((kBacktraceMaxFrames - 1) * (16 + 1)) -static char *FormatAddress(FILE *f, const struct SymbolTable *st, intptr_t addr, - char *out, unsigned size, bool symbolic) { - int64_t addend; - const char *name; - const struct Symbol *symbol; - if (st->count && ((intptr_t)addr >= (intptr_t)&_base && - (intptr_t)addr <= (intptr_t)&_end && symbolic)) { - symbol = &st->symbols[bisectcarleft((const int32_t(*)[2])st->symbols, - st->count, addr - st->addr_base - 1)]; - addend = addr - st->addr_base - symbol->addr_rva; - name = &st->name_base[symbol->name_rva]; - snprintf(out, size, "%s%c%#x", name, addend >= 0 ? '+' : '-', abs(addend)); - } else { - snprintf(out, size, "%p", addr); - } - return out; -} - -static int PrintBacktraceUsingSymbols(FILE *f, const struct StackFrame *bp, - char buf[hasatleast kBacktraceBufSize]) { - size_t gi; - intptr_t addr; - struct Garbages *garbage; - struct SymbolTable *symbols; - const struct StackFrame *frame; - if ((symbols = getsymboltable())) { - garbage = weaken(g_garbage); - gi = garbage ? garbage->i : 0; - for (frame = bp; frame; frame = frame->next) { - addr = frame->addr; - if (addr == weakaddr("CollectGarbage")) { - do { - --gi; - } while ((addr = garbage->p[gi].ret) == weakaddr("CollectGarbage")); - } - fprintf(f, "%p %p %s\n", frame, addr, - FormatAddress(f, symbols, addr, buf, kBacktraceBufSize, true)); - } - return 0; - } else { - return -1; - } -} - -static int PrintBacktraceUsingAddr2line( - FILE *f, const struct StackFrame *bp, - char buf[hasatleast kBacktraceBufSize], - char *argv[hasatleast kBacktraceMaxFrames]) { +static int PrintBacktraceUsingAddr2line(FILE *f, const struct StackFrame *bp) { ssize_t got; intptr_t addr; size_t i, j, gi; @@ -92,6 +46,7 @@ static int PrintBacktraceUsingAddr2line( struct Garbages *garbage; const struct StackFrame *frame; const char *debugbin, *p1, *p2, *p3, *addr2line; + char buf[kBacktraceBufSize], *argv[kBacktraceMaxFrames]; if (!(debugbin = finddebugbinary()) || !(addr2line = GetAddr2linePath())) { return -1; } @@ -148,24 +103,20 @@ static int PrintBacktraceUsingAddr2line( return 0; } -static noinline int PrintBacktrace(FILE *f, const struct StackFrame *bp, - char *argv[hasatleast kBacktraceMaxFrames], - char buf[hasatleast kBacktraceBufSize]) { +static int PrintBacktrace(FILE *f, const struct StackFrame *bp) { if (!IsTiny()) { - if (PrintBacktraceUsingAddr2line(f, bp, buf, argv) != -1) { + if (PrintBacktraceUsingAddr2line(f, bp) != -1) { return 0; } } - return PrintBacktraceUsingSymbols(f, bp, buf); + return PrintBacktraceUsingSymbols(f, bp, getsymboltable()); } void showbacktrace(FILE *f, const struct StackFrame *bp) { static bool noreentry; - char *argv[kBacktraceMaxFrames]; - char buf[kBacktraceBufSize]; if (!noreentry) { noreentry = true; - PrintBacktrace(f, bp, argv, buf); + PrintBacktrace(f, bp); noreentry = 0; } } diff --git a/libc/runtime/asan.greg.c b/libc/log/backtrace3.c similarity index 54% rename from libc/runtime/asan.greg.c rename to libc/log/backtrace3.c index 7f070744..03f09ecf 100644 --- a/libc/runtime/asan.greg.c +++ b/libc/log/backtrace3.c @@ -17,83 +17,53 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" +#include "libc/alg/bisectcarleft.h" +#include "libc/bits/weaken.h" +#include "libc/fmt/fmt.h" +#include "libc/log/backtrace.h" +#include "libc/macros.h" +#include "libc/nexgen32e/gc.h" +#include "libc/nexgen32e/stackframe.h" +#include "libc/runtime/symbols.h" #include "libc/stdio/stdio.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/fileno.h" -struct SourceLocation { - const char *filename; - int line; - int column; -}; - -struct AccessInfo { - const uint8_t *addr; - const uint8_t *first_bad_addr; - size_t size; - bool iswrite; - unsigned long ip; -}; - -struct Global { - const uint8_t *addr; - size_t size; - size_t size_with_redzone; - const void *name; - const void *module_name; - unsigned long has_cxx_init; - struct kasan_source_location *location; - char *odr_indicator; -}; - -privileged void __asan_init(void) { +static char *FormatAddress(FILE *f, const struct SymbolTable *st, intptr_t addr, + char *out, unsigned size, bool symbolic) { + int64_t addend; + const char *name; + const struct Symbol *symbol; + if (st->count && ((intptr_t)addr >= (intptr_t)&_base && + (intptr_t)addr <= (intptr_t)&_end && symbolic)) { + symbol = &st->symbols[bisectcarleft((const int32_t(*)[2])st->symbols, + st->count, addr - st->addr_base - 1)]; + addend = addr - st->addr_base - symbol->addr_rva; + name = &st->name_base[symbol->name_rva]; + snprintf(out, size, "%s%c%#x", name, addend >= 0 ? '+' : '-', ABS(addend)); + } else { + snprintf(out, size, "%p", addr); + } + return out; } -privileged void __asan_version_mismatch_check_v8(void) { -} - -privileged void __asan_register_globals(struct Global globals[], int n) { -} - -privileged void __asan_unregister_globals(struct Global globals[], int n) { -} - -privileged void __asan_report_load_n(uint8_t *p, int n) { -} - -privileged void __asan_report_store_n(uint8_t *p, int n) { - __asan_report_load_n(p, n); -} - -privileged void __asan_loadN(uintptr_t ptr, size_t size) { -} - -privileged void __asan_storeN(uintptr_t ptr, size_t size) { -} - -privileged uintptr_t __asan_stack_malloc(size_t size, int classid) { +int PrintBacktraceUsingSymbols(FILE *f, const struct StackFrame *bp, + struct SymbolTable *symbols) { + size_t gi; + char buf[256]; + intptr_t addr; + struct Garbages *garbage; + const struct StackFrame *frame; + if (!symbols) return -1; + garbage = weaken(g_garbage); + gi = garbage ? garbage->i : 0; + for (frame = bp; frame; frame = frame->next) { + addr = frame->addr; + if (addr == weakaddr("CollectGarbage")) { + do { + --gi; + } while ((addr = garbage->p[gi].ret) == weakaddr("CollectGarbage")); + } + fprintf(f, "%p %p %s\n", frame, addr, + FormatAddress(f, symbols, addr, buf, sizeof(buf), true)); + } return 0; } - -privileged void __asan_stack_free(uintptr_t ptr, size_t size, int classid) { -} - -privileged void __asan_handle_no_return(void) { - DebugBreak(); -} - -privileged void __asan_alloca_poison(uintptr_t addr, uintptr_t size) { -} - -privileged void __asan_allocas_unpoison(uintptr_t top, uintptr_t bottom) { -} - -privileged void *__asan_addr_is_in_fake_stack(void *fakestack, void *addr, - void **beg, void **end) { - return NULL; -} - -privileged void *__asan_get_current_fake_stack(void) { - return NULL; -} diff --git a/libc/log/commandvenv.c b/libc/log/commandvenv.c index 013ee4c2..4aded14e 100644 --- a/libc/log/commandvenv.c +++ b/libc/log/commandvenv.c @@ -28,13 +28,14 @@ */ nodiscard char *commandvenv(const char *var, const char *cmd) { const char *exepath; + char pathbuf[PATH_MAX]; if ((exepath = getenv(var))) { if (!isempty(exepath) && access(exepath, X_OK) != -1) { return exepath; } else { return NULL; } - } else if ((exepath = commandv(cmd))) { + } else if ((exepath = commandv(cmd, pathbuf))) { return exepath; } else { return NULL; diff --git a/libc/log/gdbexec.c b/libc/log/gdbexec.c index c238779c..f21da7d8 100644 --- a/libc/log/gdbexec.c +++ b/libc/log/gdbexec.c @@ -23,6 +23,7 @@ #include "libc/fmt/fmt.h" #include "libc/log/gdb.h" #include "libc/log/log.h" +#include "libc/nexgen32e/stackframe.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.h" diff --git a/libc/log/log.h b/libc/log/log.h index 57d70807..5b943a50 100644 --- a/libc/log/log.h +++ b/libc/log/log.h @@ -29,7 +29,6 @@ COSMOPOLITAN_C_START_ struct sigset; struct winsize; -struct StackFrame; typedef struct FILE FILE; extern FILE *g_logfile; @@ -51,7 +50,6 @@ void showcrashreports(void); void callexitontermination(struct sigset *); bool32 IsDebuggerPresent(bool); bool isrunningundermake(void); -void showbacktrace(FILE *, const struct StackFrame *); /*───────────────────────────────────────────────────────────────────────────│─╗ │ cosmopolitan § liblog » logging ─╬─│┼ diff --git a/libc/log/log.mk b/libc/log/log.mk index ecd2d416..efbb144d 100644 --- a/libc/log/log.mk +++ b/libc/log/log.mk @@ -8,7 +8,6 @@ LIBC_LOG = $(LIBC_LOG_A_DEPS) $(LIBC_LOG_A) LIBC_LOG_A = o/$(MODE)/libc/log/log.a LIBC_LOG_A_FILES := \ $(wildcard libc/log/thunks/*) \ - $(wildcard libc/log/elf/*) \ $(wildcard libc/log/*) LIBC_LOG_A_HDRS = $(filter %.h,$(LIBC_LOG_A_FILES)) LIBC_LOG_A_SRCS_C = $(filter %.c,$(LIBC_LOG_A_FILES)) @@ -38,6 +37,7 @@ LIBC_LOG_A_DIRECTDEPS = \ LIBC_TINYMATH \ LIBC_NEXGEN32E \ LIBC_NT_KERNELBASE \ + LIBC_MEM \ LIBC_RAND \ LIBC_RUNTIME \ LIBC_STDIO \ @@ -60,17 +60,10 @@ $(LIBC_LOG_A).pkg: \ $(LIBC_LOG_A_OBJS) \ $(foreach x,$(LIBC_LOG_A_DIRECTDEPS),$($(x)_A).pkg) -o/$(MODE)/libc/log/die.o \ -o/$(MODE)/libc/log/perror.o \ -o/$(MODE)/libc/log/ftrace.o \ -o/$(MODE)/libc/log/ubsan.o \ -o/$(MODE)/libc/log/symbols.o \ -o/$(MODE)/libc/log/backtrace.o \ -o/$(MODE)/libc/log/oncrash.o \ -o/$(MODE)/libc/log/shadowargs.o \ -o/$(MODE)/libc/log/thunks/__check_fail_ndebug.o: \ - OVERRIDE_COPTS += \ - $(NO_MAGIC) +$(LIBC_LOG_A_OBJS): \ + OVERRIDE_CFLAGS += \ + $(NO_MAGIC) \ + -fwrapv LIBC_LOG_LIBS = $(foreach x,$(LIBC_LOG_ARTIFACTS),$($(x))) LIBC_LOG_SRCS = $(foreach x,$(LIBC_LOG_ARTIFACTS),$($(x)_SRCS)) diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index 81729585..2cc4cc75 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -23,10 +23,12 @@ #include "libc/calls/ucontext.h" #include "libc/dce.h" #include "libc/fmt/fmt.h" +#include "libc/log/backtrace.h" #include "libc/log/gdb.h" #include "libc/log/internal.h" #include "libc/log/log.h" #include "libc/macros.h" +#include "libc/nexgen32e/stackframe.h" #include "libc/runtime/internal.h" #include "libc/runtime/memtrack.h" #include "libc/runtime/runtime.h" @@ -163,7 +165,6 @@ relegated static void ShowCrashReport(int err, FILE *f, int sig, } fputc('\n', f); fflush(f); - memsummary(fileno(f)); ShowMemoryMappings(fileno(f)); } diff --git a/libc/stubs/asanjmp.greg.S b/libc/log/somanyasan.S similarity index 81% rename from libc/stubs/asanjmp.greg.S rename to libc/log/somanyasan.S index cb9564fa..c4772f6f 100644 --- a/libc/stubs/asanjmp.greg.S +++ b/libc/log/somanyasan.S @@ -21,6 +21,12 @@ .privileged .source __FILE__ +/ @fileoverview Address Sanitizer Thunks +/ +/ This has tiny code size and reduces API surface area +/ since ASAN has the same stylistic hugeness as UBSAN. +/ We also guard all the functions, against reentrancy. + __asan_load1: push $1 jmp OnLoad @@ -43,13 +49,11 @@ __asan_load16: .endfn __asan_load16,globl __asan_load32: push $32 -/ fallthrough +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_load32,globl OnLoad: pop %rsi - .globl __asan_loadN - .weak __asan_loadN ezlea __asan_loadN,ax - jmp OnAsan + jmp __asan_report_noreentry .endfn OnStore __asan_store1: @@ -74,13 +78,11 @@ __asan_store16: .endfn __asan_store16,globl __asan_store32: push $32 -/ fallthrough +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_store32,globl OnStore:pop %rsi - .globl __asan_storeN - .weak __asan_storeN ezlea __asan_storeN,ax - jmp OnAsan + jmp __asan_report_noreentry .endfn OnStore __asan_report_load1: @@ -101,14 +103,12 @@ __asan_report_load8: .endfn __asan_report_load8,globl __asan_report_load16: push $16 -/ fallthrough +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_report_load16,globl OnReportLoad: pop %rsi - .globl __asan_report_load_n - .weak __asan_report_load_n ezlea __asan_report_load_n,ax - jmp OnAsan + jmp __asan_report_noreentry .endfn OnReportLoad __asan_report_store1: @@ -133,33 +133,31 @@ __asan_report_store16: .endfn __asan_report_store16,globl __asan_report_store32: push $32 -/ fallthrough +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_report_store32,globl ReportStore: pop %rsi - .globl __asan_report_store_n - .weak __asan_report_store_n ezlea __asan_report_store_n,ax -/ fallthrough +/ 𝑠𝑙𝑖𝑑𝑒 .endfn ReportStore -OnAsan: test %rax,%rax - jz 1f - jmp *%rax -1: ret - .endfn OnAsan +__asan_report_noreentry: + push %rbp + mov %rsp,%rbp + cmpb $0,noreentry(%rip) + jnz 2f + incb noreentry(%rip) + call *%rax + decb noreentry(%rip) + pop %rbp + ret +2: call abort + .endfn __asan_report_noreentry __asan_stack_free_0: push $0 -/ fallthrough + jmp OnStackFree .endfn __asan_stack_free_0,globl -OnStackFree: - pop %rdx - .globl __asan_stack_free - .weak __asan_stack_free - ezlea __asan_stack_free,ax - jmp OnAsan - .endfn OnStackFree __asan_stack_free_1: push $1 jmp OnStackFree @@ -198,20 +196,17 @@ __asan_stack_free_9: .endfn __asan_stack_free_9,globl __asan_stack_free_10: push $10 - jmp OnStackFree +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_stack_free_10,globl +OnStackFree: + pop %rdx + jmp __asan_stack_free + .endfn OnStackFree __asan_stack_malloc_0: push $0 -/ fallthrough + jmp OnStackMalloc .endfn __asan_stack_malloc_0,globl -OnStackMalloc: - pop %rsi - .globl __asan_stack_malloc - .weak __asan_stack_malloc - ezlea __asan_stack_malloc,ax - jmp OnAsan - .endfn OnStackMalloc __asan_stack_malloc_1: push $1 jmp OnStackMalloc @@ -250,11 +245,54 @@ __asan_stack_malloc_9: .endfn __asan_stack_malloc_9,globl __asan_stack_malloc_10: push $10 - jmp OnStackMalloc +/ 𝑠𝑙𝑖𝑑𝑒 .endfn __asan_stack_malloc_10,globl +OnStackMalloc: + pop %rsi + jmp __asan_stack_malloc + .endfn OnStackMalloc + +__asan_handle_no_return: + ret + .endfn __asan_handle_no_return,globl + +__asan_before_dynamic_init: + ret + .endfn __asan_before_dynamic_init,globl + +__asan_after_dynamic_init: + ret + .endfn __asan_after_dynamic_init,globl + +__asan_unregister_globals: + ret + .endfn __asan_unregister_globals,globl + +__asan_version_mismatch_check_v8: + ret + .endfn __asan_version_mismatch_check_v8,globl + +/ Initializes Address Sanitizer runtime earlier if linked. + .init.start 301,_init_asan + push %rdi + push %rsi + mov %r12,%rdi + mov %r13,%rsi + mov %r14,%rdx + mov %r15,%rcx + call __asan_init + pop %rsi + pop %rdi + .init.end 301,_init_asan .rodata.cst4 __asan_option_detect_stack_use_after_return: .long 1 .endobj __asan_option_detect_stack_use_after_return,globl .previous + + .bss +noreentry: + .byte 0 + .endobj noreentry + .previous diff --git a/libc/log/ubsanjmp.S b/libc/log/somanyubsan.S similarity index 100% rename from libc/log/ubsanjmp.S rename to libc/log/somanyubsan.S diff --git a/libc/math/math.mk b/libc/math/math.mk index 0730fbcd..e507cba7 100644 --- a/libc/math/math.mk +++ b/libc/math/math.mk @@ -6,9 +6,7 @@ PKGS += LIBC_MATH LIBC_MATH_ARTIFACTS += LIBC_MATH_A LIBC_MATH = $(LIBC_MATH_A_DEPS) $(LIBC_MATH_A) LIBC_MATH_A = o/$(MODE)/libc/math/math.a -LIBC_MATH_A_FILES := \ - $(wildcard libc/math/*) \ - $(wildcard libc/math/delegates/*) +LIBC_MATH_A_FILES := $(wildcard libc/math/*) LIBC_MATH_A_SRCS_A = $(filter %.s,$(LIBC_MATH_A_FILES)) LIBC_MATH_A_SRCS_S = $(filter %.S,$(LIBC_MATH_A_FILES)) LIBC_MATH_A_SRCS_C = $(filter %.c,$(LIBC_MATH_A_FILES)) diff --git a/libc/mem/free-cxx.S b/libc/mem/cxx/free.S similarity index 64% rename from libc/mem/free-cxx.S rename to libc/mem/cxx/free.S index a9f60917..af97b83d 100644 --- a/libc/mem/free-cxx.S +++ b/libc/mem/cxx/free.S @@ -26,34 +26,61 @@ / @param %rsi is ignored / @param %rdx is ignored _ZdlPvSt11align_val_tRKSt9nothrow_t: +/ operator delete(void*, std::align_val_t, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdlPvSt11align_val_tRKSt9nothrow_t,weak _ZdaPvSt11align_val_tRKSt9nothrow_t: +/ operator delete[](void*, std::align_val_t, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPvSt11align_val_tRKSt9nothrow_t,weak _ZdlPvRKSt9nothrow_t: +/ operator delete(void*, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdlPvRKSt9nothrow_t,weak _ZdaPvRKSt9nothrow_t: +/ operator delete[](void*, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPvRKSt9nothrow_t,weak _ZdlPvmSt11align_val_t: +/ operator delete(void*, unsigned long, std::align_val_t) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdlPvmSt11align_val_t,weak _ZdaPvmSt11align_val_t: +/ operator delete[](void*, unsigned long, std::align_val_t) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPvmSt11align_val_t,weak _ZdlPvSt11align_val_t: +/ operator delete(void*, std::align_val_t) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdlPvSt11align_val_t,weak _ZdaPvSt11align_val_t: +/ operator delete[](void*, std::align_val_t) nop -_ZdaPvm:nop -_ZdlPvm:nop -_ZdaPv: nop -_ZdlPv: jmp *hook$free(%rip) - .endfn _ZdlPv,globl,weak - .endfn _ZdaPv,globl,weak - .endfn _ZdaPvm,globl,weak - .endfn _ZdlPvm,globl,weak - .endfn _ZdaPvRKSt9nothrow_t,globl,weak - .endfn _ZdlPvRKSt9nothrow_t,globl,weak - .endfn _ZdaPvSt11align_val_t,globl,weak - .endfn _ZdlPvSt11align_val_t,globl,weak - .endfn _ZdaPvmSt11align_val_t,globl,weak - .endfn _ZdlPvmSt11align_val_t,globl,weak - .endfn _ZdlPvSt11align_val_tRKSt9nothrow_t,globl,weak - .endfn _ZdaPvSt11align_val_tRKSt9nothrow_t,globl,weak +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPvSt11align_val_t,weak +_ZdaPvm: +/ operator delete[](void*, unsigned long): + nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPvm,weak +_ZdlPvm: +/ operator delete(void*, unsigned long) + nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdlPvm,weak +_ZdaPv: +/ operator delete[](void*) + nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZdaPv,weak +_ZdlPv: +/ operator delete(void*) + jmp *hook$free(%rip) + .endfn _ZdlPv,weak diff --git a/libc/mem/malloc-cxx.S b/libc/mem/cxx/malloc.S similarity index 85% rename from libc/mem/malloc-cxx.S rename to libc/mem/cxx/malloc.S index 843d0b54..318e1f11 100644 --- a/libc/mem/malloc-cxx.S +++ b/libc/mem/cxx/malloc.S @@ -26,15 +26,24 @@ / @param %rsi is ignored / @return new memory or NULL on OOM _ZnamRKSt9nothrow_t: +/ operator new[](unsigned long, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZnamRKSt9nothrow_t,weak _ZnwmRKSt9nothrow_t: +/ operator new(unsigned long, std::nothrow_t const&) nop -_Znam: nop -_Znwm: test %rdi,%rdi +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZnwmRKSt9nothrow_t,weak +_Znam: +/ operator new[](unsigned long) + nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _Znam,weak +_Znwm: +/ operator new(unsigned long) + test %rdi,%rdi jne 1f mov $1,%edi 1: jmp *hook$malloc(%rip) - .endfn _Znwm,globl,weak - .endfn _Znam,globl,weak - .endfn _ZnwmRKSt9nothrow_t,globl,weak - .endfn _ZnamRKSt9nothrow_t,globl,weak + .endfn _Znwm,weak diff --git a/libc/mem/memalign-cxx.S b/libc/mem/cxx/memalign.S similarity index 83% rename from libc/mem/memalign-cxx.S rename to libc/mem/cxx/memalign.S index fcbcfc86..7b5ded02 100644 --- a/libc/mem/memalign-cxx.S +++ b/libc/mem/cxx/memalign.S @@ -27,12 +27,22 @@ / @param %rdx is ignored / @return new memory or NULL on OOM _ZnamSt11align_val_tRKSt9nothrow_t: +/ operator new[](unsigned long, std::align_val_t, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZnamSt11align_val_tRKSt9nothrow_t,weak _ZnwmSt11align_val_tRKSt9nothrow_t: +/ operator new(unsigned long, std::align_val_t, std::nothrow_t const&) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZnwmSt11align_val_tRKSt9nothrow_t,weak _ZnwmSt11align_val_t: +/ operator new(unsigned long, std::align_val_t) nop +/ 𝑠𝑙𝑖𝑑𝑒 + .endfn _ZnwmSt11align_val_t,weak _ZnamSt11align_val_t: +/ operator new[](unsigned long, std::align_val_t) test %rdi,%rdi jnz 1f mov $1,%eax @@ -41,7 +51,4 @@ _ZnamSt11align_val_t: cmovb %rax,%rsi xchg %rdi,%rsi jmp *hook$memalign(%rip) - .endfn _ZnwmSt11align_val_t,globl,weak - .endfn _ZnamSt11align_val_t,globl,weak - .endfn _ZnamSt11align_val_tRKSt9nothrow_t,globl,weak - .endfn _ZnwmSt11align_val_tRKSt9nothrow_t,globl,weak + .endfn _ZnamSt11align_val_t,weak diff --git a/libc/mem/calloc-hook.S b/libc/mem/hook/calloc.S similarity index 96% rename from libc/mem/calloc-hook.S rename to libc/mem/hook/calloc.S index 52662a15..02ecea2a 100644 --- a/libc/mem/calloc-hook.S +++ b/libc/mem/hook/calloc.S @@ -20,15 +20,15 @@ #include "libc/macros.h" .source __FILE__ - .initbss 800,_init_calloc + .initbss 202,_init_calloc hook$calloc: .quad 0 .endobj hook$calloc,globl,hidden .previous - .init.start 800,_init_calloc + .init.start 202,_init_calloc .hidden dlcalloc ezlea dlcalloc,ax stosq yoink free - .init.end 800,_init_calloc + .init.end 202,_init_calloc diff --git a/libc/mem/free-hook.S b/libc/mem/hook/free.S similarity index 96% rename from libc/mem/free-hook.S rename to libc/mem/hook/free.S index 55fa71e8..e91bef46 100644 --- a/libc/mem/free-hook.S +++ b/libc/mem/hook/free.S @@ -20,16 +20,16 @@ #include "libc/macros.h" .source __FILE__ - .initbss 800,_init_free + .initbss 202,_init_free hook$free: .quad 0 .endobj hook$free,globl,hidden .previous - .init.start 800,_init_free + .init.start 202,_init_free ezlea dlfree,ax stosq yoink realloc - .init.end 800,_init_free + .init.end 202,_init_free .hidden dlfree diff --git a/libc/mem/hook/hook.h b/libc/mem/hook/hook.h new file mode 100644 index 00000000..b9c2b8fe --- /dev/null +++ b/libc/mem/hook/hook.h @@ -0,0 +1,18 @@ +#ifndef COSMOPOLITAN_LIBC_MEM_HOOK_HOOK_H_ +#define COSMOPOLITAN_LIBC_MEM_HOOK_HOOK_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +extern void (*hook$free)(void *); +extern void *(*hook$malloc)(size_t); +extern void *(*hook$calloc)(size_t, size_t); +extern void *(*hook$memalign)(size_t, size_t); +extern void *(*hook$realloc)(void *, size_t); +extern void *(*hook$realloc_in_place)(void *, size_t); +extern void *(*hook$valloc)(size_t); +extern void *(*hook$pvalloc)(size_t); +extern size_t (*hook$malloc_usable_size)(const void *); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_MEM_HOOK_HOOK_H_ */ diff --git a/libc/mem/malloc-hook.S b/libc/mem/hook/malloc.S similarity index 96% rename from libc/mem/malloc-hook.S rename to libc/mem/hook/malloc.S index c0e8bf10..0fb00f6e 100644 --- a/libc/mem/malloc-hook.S +++ b/libc/mem/hook/malloc.S @@ -20,16 +20,15 @@ #include "libc/macros.h" .source __FILE__ - .initbss 800,_init_malloc + .initbss 202,_init_malloc hook$malloc: .quad 0 .endobj hook$malloc,globl,hidden .previous - .init.start 800,_init_malloc + .init.start 202,_init_malloc + .hidden dlmalloc ezlea dlmalloc,ax stosq yoink free - .init.end 800,_init_malloc - - .hidden dlmalloc + .init.end 202,_init_malloc diff --git a/libc/mem/hook/malloc_usable_size.S b/libc/mem/hook/malloc_usable_size.S new file mode 100644 index 00000000..276aad7c --- /dev/null +++ b/libc/mem/hook/malloc_usable_size.S @@ -0,0 +1,33 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + + .initbss 202,_init_malloc_usable_size +hook$malloc_usable_size: + .quad 0 + .endobj hook$malloc_usable_size,globl,hidden + .previous + + .init.start 202,_init_malloc_usable_size + .hidden dlmalloc_usable_size + ezlea dlmalloc_usable_size,ax + stosq + .init.end 202,_init_malloc_usable_size diff --git a/libc/mem/memalign-hook.S b/libc/mem/hook/memalign.S similarity index 95% rename from libc/mem/memalign-hook.S rename to libc/mem/hook/memalign.S index 0acdd540..b94a4926 100644 --- a/libc/mem/memalign-hook.S +++ b/libc/mem/hook/memalign.S @@ -20,16 +20,15 @@ #include "libc/macros.h" .source __FILE__ - .initbss 800,_init_memalign + .initbss 202,_init_memalign hook$memalign: .quad 0 .endobj hook$memalign,globl,hidden .previous - .init.start 800,_init_memalign + .init.start 202,_init_memalign + .hidden dlmemalign ezlea dlmemalign,ax stosq yoink free - .init.end 800,_init_memalign - - .hidden dlmemalign + .init.end 202,_init_memalign diff --git a/libc/mem/hook/posix_memalign.S b/libc/mem/hook/posix_memalign.S new file mode 100644 index 00000000..c7374516 --- /dev/null +++ b/libc/mem/hook/posix_memalign.S @@ -0,0 +1,34 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + + .initbss 202,_init_posix_memalign +hook$posix_memalign: + .quad 0 + .endobj hook$posix_memalign,globl,hidden + .previous + + .init.start 202,_init_posix_memalign + .hidden dlposix_memalign + ezlea dlposix_memalign,ax + stosq + yoink free + .init.end 202,_init_posix_memalign diff --git a/libc/mem/hook/pvalloc.S b/libc/mem/hook/pvalloc.S new file mode 100644 index 00000000..c2c45899 --- /dev/null +++ b/libc/mem/hook/pvalloc.S @@ -0,0 +1,34 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + + .initbss 202,_init_pvalloc +hook$pvalloc: + .quad 0 + .endobj hook$pvalloc,globl,hidden + .previous + + .init.start 202,_init_pvalloc + .hidden dlpvalloc + ezlea dlpvalloc,ax + stosq + yoink free + .init.end 202,_init_pvalloc diff --git a/libc/mem/realloc-hook.S b/libc/mem/hook/realloc.S similarity index 95% rename from libc/mem/realloc-hook.S rename to libc/mem/hook/realloc.S index ff31fe2e..b87067e3 100644 --- a/libc/mem/realloc-hook.S +++ b/libc/mem/hook/realloc.S @@ -20,15 +20,15 @@ #include "libc/macros.h" .source __FILE__ - .initbss 800,_init_realloc + .initbss 202,_init_realloc hook$realloc: .quad 0 .endobj hook$realloc,globl,hidden .previous - .init.start 800,_init_realloc + .init.start 202,_init_realloc .hidden dlrealloc ezlea dlrealloc,ax stosq yoink free - .init.end 800,_init_realloc + .init.end 202,_init_realloc diff --git a/libc/mem/hook/realloc_in_place.S b/libc/mem/hook/realloc_in_place.S new file mode 100644 index 00000000..db02e6ab --- /dev/null +++ b/libc/mem/hook/realloc_in_place.S @@ -0,0 +1,34 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + + .initbss 202,_init_realloc_in_place +hook$realloc_in_place: + .quad 0 + .endobj hook$realloc_in_place,globl,hidden + .previous + + .init.start 202,_init_realloc_in_place + .hidden dlrealloc_in_place + ezlea dlrealloc_in_place,ax + stosq + yoink free + .init.end 202,_init_realloc_in_place diff --git a/libc/mem/hook/valloc.S b/libc/mem/hook/valloc.S new file mode 100644 index 00000000..845c8c9f --- /dev/null +++ b/libc/mem/hook/valloc.S @@ -0,0 +1,34 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + + .initbss 202,_init_valloc +hook$valloc: + .quad 0 + .endobj hook$valloc,globl,hidden + .previous + + .init.start 202,_init_valloc + .hidden dlvalloc + ezlea dlvalloc,ax + stosq + yoink free + .init.end 202,_init_valloc diff --git a/libc/mem/malloc.S b/libc/mem/malloc.S index 1f6af70b..5de400ef 100644 --- a/libc/mem/malloc.S +++ b/libc/mem/malloc.S @@ -39,4 +39,3 @@ / @see dlmalloc() malloc: jmp *hook$malloc(%rip) .endfn malloc,globl - diff --git a/libc/mem/malloc_usable_size.S b/libc/mem/malloc_usable_size.S index 0a1d2aae..72f15981 100644 --- a/libc/mem/malloc_usable_size.S +++ b/libc/mem/malloc_usable_size.S @@ -40,14 +40,3 @@ malloc_usable_size: jmp *hook$malloc_usable_size(%rip) .endfn malloc_usable_size,globl - - .initbss 800,_init_malloc_usable_size -hook$malloc_usable_size: - .quad 0 - .endobj hook$malloc_usable_size,globl,hidden - .previous - - .init.start 800,_init_malloc_usable_size - ezlea dlmalloc_usable_size,ax - stosq - .init.end 800,_init_malloc_usable_size diff --git a/libc/mem/mem.mk b/libc/mem/mem.mk index edcfc1b0..08a3d0d3 100644 --- a/libc/mem/mem.mk +++ b/libc/mem/mem.mk @@ -6,11 +6,15 @@ PKGS += LIBC_MEM LIBC_MEM_ARTIFACTS += LIBC_MEM_A LIBC_MEM = $(LIBC_MEM_A_DEPS) $(LIBC_MEM_A) LIBC_MEM_A = o/$(MODE)/libc/mem/mem.a -LIBC_MEM_A_FILES := $(wildcard libc/mem/*) LIBC_MEM_A_HDRS = $(filter %.h,$(LIBC_MEM_A_FILES)) LIBC_MEM_A_SRCS_S = $(filter %.S,$(LIBC_MEM_A_FILES)) LIBC_MEM_A_SRCS_C = $(filter %.c,$(LIBC_MEM_A_FILES)) +LIBC_MEM_A_FILES := \ + $(wildcard libc/mem/*) \ + $(wildcard libc/mem/cxx/*) \ + $(wildcard libc/mem/hook/*) + LIBC_MEM_A_SRCS = \ $(LIBC_MEM_A_SRCS_S) \ $(LIBC_MEM_A_SRCS_C) diff --git a/libc/mem/posix_memalign.S b/libc/mem/posix_memalign.S index 5588342c..95a66c04 100644 --- a/libc/mem/posix_memalign.S +++ b/libc/mem/posix_memalign.S @@ -35,17 +35,3 @@ posix_memalign: jmp *hook$posix_memalign(%rip) .endfn posix_memalign,globl - - .initbss 800,_init_posix_memalign -hook$posix_memalign: - .quad 0 - .endobj hook$posix_memalign,globl,hidden - .previous - - .init.start 800,_init_posix_memalign - .hidden dlposix_memalign - ezlea dlposix_memalign,ax - stosq - .init.end 800,_init_posix_memalign - - yoink free diff --git a/libc/mem/pvalloc.S b/libc/mem/pvalloc.S index fc2ea229..09906cbe 100644 --- a/libc/mem/pvalloc.S +++ b/libc/mem/pvalloc.S @@ -22,24 +22,10 @@ .source __FILE__ / Equivalent to valloc(minimum-page-that-holds(n)), that is, -/ round up n to nearest PAGESIZE. +/ round up n to nearest pagesize. / / @param rdi is number of bytes needed / @return rax is memory address, or NULL w/ errno / @see dlpvalloc() pvalloc:jmp *hook$pvalloc(%rip) .endfn pvalloc,globl - - .initbss 800,_init_pvalloc -hook$pvalloc: - .quad 0 - .endobj hook$pvalloc,globl,hidden - .previous - - .init.start 800,_init_pvalloc - .hidden dlpvalloc - ezlea dlpvalloc,ax - stosq - .init.end 800,_init_pvalloc - - yoink free diff --git a/libc/mem/realloc.S b/libc/mem/realloc.S index c9cdcaaf..be88cfca 100644 --- a/libc/mem/realloc.S +++ b/libc/mem/realloc.S @@ -55,5 +55,6 @@ / @note realloc(p=0, n=0) → malloc(32) / @note realloc(p≠0, n=0) → free(p) / @see dlrealloc() -realloc:jmp *hook$realloc(%rip) +realloc: + jmp *hook$realloc(%rip) .endfn realloc,globl diff --git a/libc/mem/realloc_in_place.S b/libc/mem/realloc_in_place.S index 1709b7d8..492104d6 100644 --- a/libc/mem/realloc_in_place.S +++ b/libc/mem/realloc_in_place.S @@ -23,12 +23,12 @@ / Resizes the space allocated for p to size n, only if this can be / done without moving p (i.e., only if there is adjacent space -/ available if n is greater than p's current allocated size, or n is -/ less than or equal to p's size). This may be used instead of plain -/ realloc if an alternative allocation strategy is needed upon failure -/ to expand space, for example, reallocation of a buffer that must be -/ memory-aligned or cleared. You can use realloc_in_place to trigger -/ these alternatives only when needed. +/ available if n is greater than p's current allocated size, or n +/ is less than or equal to p's size). This may be used instead of +/ plain realloc if an alternative allocation strategy is needed +/ upon failure to expand space, for example, reallocation of a +/ buffer that must be memory-aligned or cleared. You can use +/ realloc_in_place to trigger these alternatives only when needed. / / @param rdi (p) is address of current allocation / @param rsi (newsize) is number of bytes needed @@ -37,17 +37,3 @@ realloc_in_place: jmp *hook$realloc_in_place(%rip) .endfn realloc_in_place,globl - - .initbss 800,_init_realloc_in_place -hook$realloc_in_place: - .quad 0 - .endobj hook$realloc_in_place,globl,hidden - .previous - - .init.start 800,_init_realloc_in_place - .hidden dlrealloc_in_place - ezlea dlrealloc_in_place,ax - stosq - .init.end 800,_init_realloc_in_place - - yoink free diff --git a/libc/mem/valloc.S b/libc/mem/valloc.S index 9b4a6e86..3056679d 100644 --- a/libc/mem/valloc.S +++ b/libc/mem/valloc.S @@ -21,24 +21,10 @@ #include "libc/notice.inc" .source __FILE__ -/ Equivalent to memalign(PAGESIZE, n). +/ Equivalent to memalign(4096, n). / / @param rdi is number of bytes needed / @return rax is memory address, or NULL w/ errno / @see dlvalloc() valloc: jmp *hook$valloc(%rip) .endfn valloc,globl - - .initbss 800,_init_valloc -hook$valloc: - .quad 0 - .endobj hook$valloc,globl,hidden - .previous - - .init.start 800,_init_valloc - .hidden dlvalloc - ezlea dlvalloc,ax - stosq - .init.end 800,_init_valloc - - yoink free diff --git a/libc/nexgen32e/djbsort-avx2.S b/libc/nexgen32e/djbsort-avx2.S index 438e1072..a263a3dc 100644 --- a/libc/nexgen32e/djbsort-avx2.S +++ b/libc/nexgen32e/djbsort-avx2.S @@ -1,2206 +1,420 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.h" .source __FILE__ - .p2align 4 -minmax_vector: - cmp $7,%rdx - jle .L27 - test $7,%dl - je .L5 - lea -32(,%rdx,4),%rax - lea (%rdi,%rax),%rcx - add %rsi,%rax - vmovdqu (%rax),%ymm0 - vmovdqu (%rcx),%ymm1 - and $-8,%rdx - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,(%rcx) - vmovdqu %ymm0,(%rax) - .p2align 4,,10 - .p2align 3 -.L5: vmovdqu (%rsi),%ymm1 - vmovdqu (%rdi),%ymm0 - add $32,%rsi - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,(%rdi) - vmovdqu %ymm0,-32(%rsi) - add $32,%rdi - sub $8,%rdx - jne .L5 - vzeroupper -.L25: ret - .p2align 4,,10 - .p2align 3 -.L27: test %rdx,%rdx - jle .L25 - mov (%rdi),%eax - cmp (%rsi),%eax - cmovg (%rsi),%ecx - cmovg %eax,%eax - mov %ecx,(%rdi) - mov %eax,(%rsi) - cmp $1,%rdx - je .L25 - mov 4(%rdi),%eax - cmp 4(%rsi),%eax - cmovg 4(%rsi),%ecx - cmovg %eax,%eax - mov %ecx,4(%rdi) - mov %eax,4(%rsi) - cmp $2,%rdx - je .L25 - mov 8(%rdi),%eax - cmp 8(%rsi),%eax - cmovg 8(%rsi),%ecx - cmovg %eax,%eax - mov %ecx,8(%rdi) - mov %eax,8(%rsi) - cmp $3,%rdx - je .L25 - mov 12(%rdi),%eax - cmp 12(%rsi),%eax - cmovg 12(%rsi),%ecx - cmovg %eax,%eax - mov %ecx,12(%rdi) - mov %eax,12(%rsi) - cmp $4,%rdx - je .L25 - mov 16(%rdi),%eax - cmp 16(%rsi),%eax - cmovg 16(%rsi),%ecx - cmovg %eax,%eax - mov %ecx,16(%rdi) - mov %eax,16(%rsi) - cmp $5,%rdx - je .L25 - mov 20(%rdi),%eax - cmp 20(%rsi),%eax - cmovg 20(%rsi),%ecx - cmovg %eax,%eax - mov %ecx,20(%rdi) - mov %eax,20(%rsi) - cmp $7,%rdx - jne .L25 - mov 24(%rdi),%eax - cmp 24(%rsi),%eax - cmovg 24(%rsi),%edx - cmovg %eax,%eax - mov %edx,24(%rdi) - mov %eax,24(%rsi) - ret - .endfn minmax_vector,globl - - .p2align 4 -int32_twostages_32: - test %rsi,%rsi - jle .L33 - lea -128(%rsi),%rax - dec %rsi - and $-128,%rsi - mov %rax,%rdx - sub %rsi,%rdx - jmp .L30 - .p2align 4,,10 - .p2align 3 -.L34: add $-128,%rax -.L30: vmovdqu 256(%rdi),%ymm1 - vmovdqu (%rdi),%ymm0 - vmovdqu 384(%rdi),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu 128(%rdi),%ymm1 - add $512,%rdi - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm0,-128(%rdi) - vmovdqu -224(%rdi),%ymm1 - vmovdqu -480(%rdi),%ymm0 - vmovdqu %ymm4,-512(%rdi) - vmovdqu %ymm2,-384(%rdi) - vmovdqu -96(%rdi),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu -352(%rdi),%ymm1 - vmovdqu %ymm3,-256(%rdi) - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm0,-96(%rdi) - vmovdqu -192(%rdi),%ymm1 - vmovdqu -448(%rdi),%ymm0 - vmovdqu %ymm4,-480(%rdi) - vmovdqu %ymm2,-352(%rdi) - vmovdqu -64(%rdi),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu -320(%rdi),%ymm1 - vmovdqu %ymm3,-224(%rdi) - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm0,-64(%rdi) - vmovdqu -160(%rdi),%ymm1 - vmovdqu -416(%rdi),%ymm0 - vmovdqu %ymm4,-448(%rdi) - vmovdqu %ymm2,-320(%rdi) - vmovdqu -32(%rdi),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu -288(%rdi),%ymm1 - vmovdqu %ymm3,-192(%rdi) - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm4,-416(%rdi) - vmovdqu %ymm2,-288(%rdi) - vmovdqu %ymm3,-160(%rdi) - vmovdqu %ymm0,-32(%rdi) - cmp %rdx,%rax - jne .L34 - vzeroupper -.L33: ret - .endfn int32_twostages_32,globl - - .p2align 4 -int32_threestages: - push %rbp - mov %rsp,%rbp - push %r15 - push %r14 - lea 0(,%rdx,8),%r14 - push %r13 - push %r12 - push %rbx - and $-32,%rsp - sub $32,%rsp - mov %rsi,16(%rsp) - cmp %r14,%rsi - jl .L41 - lea -1(%rdx),%rax - and $-8,%rax - lea (%rdx,%rdx),%r8 - mov %rax,8(%rsp) - lea (%r8,%rdx),%rcx - lea 0(,%rdx,4),%rsi - mov %r14,%r9 - mov %rdi,%r13 - lea (%rsi,%rdx),%r11 - lea (%rcx,%rcx),%r10 - sub %rdx,%r9 - xor %r12d,%r12d - mov %r14,%rbx - lea 32(%rdi),%r15 - .p2align 4,,10 - .p2align 3 -.L37: mov %r12,%rdi - lea (%rdx,%rdi),%rax - mov %rbx,24(%rsp) - mov %rbx,%r12 - cmp %rax,%rdi - jge .L40 - lea 0(%r13,%rdi,4),%rax - add 8(%rsp),%rdi - lea (%r15,%rdi,4),%rdi - .p2align 4,,10 - .p2align 3 -.L38: vmovdqu (%rax,%rsi,4),%ymm0 - vmovdqu (%rax),%ymm6 - vmovdqu (%rax,%rdx,4),%ymm1 - vpminsd %ymm0,%ymm6,%ymm7 - vpmaxsd %ymm0,%ymm6,%ymm6 - vmovdqu (%rax,%r11,4),%ymm0 - vmovdqu (%rax,%r9,4),%ymm8 - vpmaxsd %ymm0,%ymm1,%ymm3 - vpminsd %ymm0,%ymm1,%ymm2 - vmovdqu (%rax,%r10,4),%ymm1 - vmovdqu (%rax,%r8,4),%ymm0 - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu (%rax,%rcx,4),%ymm1 - vpminsd %ymm8,%ymm1,%ymm5 - vpmaxsd %ymm8,%ymm1,%ymm1 - vpminsd %ymm4,%ymm7,%ymm8 - vpmaxsd %ymm4,%ymm7,%ymm4 - vpminsd %ymm5,%ymm2,%ymm7 - vpmaxsd %ymm5,%ymm2,%ymm2 - vpminsd %ymm0,%ymm6,%ymm5 - vpmaxsd %ymm0,%ymm6,%ymm0 - vpminsd %ymm1,%ymm3,%ymm6 - vpmaxsd %ymm1,%ymm3,%ymm1 - vpminsd %ymm7,%ymm8,%ymm9 - vpmaxsd %ymm7,%ymm8,%ymm3 - vpminsd %ymm2,%ymm4,%ymm8 - vpminsd %ymm6,%ymm5,%ymm7 - vpmaxsd %ymm2,%ymm4,%ymm2 - vpmaxsd %ymm6,%ymm5,%ymm5 - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm9,(%rax) - vmovdqu %ymm3,(%rax,%rdx,4) - vmovdqu %ymm8,(%rax,%r8,4) - vmovdqu %ymm2,(%rax,%rcx,4) - vmovdqu %ymm7,(%rax,%rsi,4) - vmovdqu %ymm5,(%rax,%r11,4) - vmovdqu %ymm4,(%rax,%r10,4) - vmovdqu %ymm0,(%rax,%r9,4) - add $32,%rax - cmp %rax,%rdi - jne .L38 -.L40: add %r14,%rbx - cmp %rbx,16(%rsp) - jge .L37 - vzeroupper -.L35: mov 24(%rsp),%rax - lea -40(%rbp),%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret -.L41: movq $0,24(%rsp) - jmp .L35 - .endfn int32_threestages,globl - - .p2align 4 -merge16_finish: - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vperm2i128 $32,%ymm0,%ymm3,%ymm2 - vperm2i128 $49,%ymm0,%ymm3,%ymm0 - vpminsd %ymm0,%ymm2,%ymm1 - vpmaxsd %ymm0,%ymm2,%ymm0 - vpunpcklqdq %ymm0,%ymm1,%ymm2 - vpunpckhqdq %ymm0,%ymm1,%ymm0 - vpminsd %ymm0,%ymm2,%ymm1 - vpmaxsd %ymm0,%ymm2,%ymm2 - vpunpckldq %ymm2,%ymm1,%ymm0 - vpunpckhdq %ymm2,%ymm1,%ymm1 - vpunpcklqdq %ymm1,%ymm0,%ymm3 - vpunpckhqdq %ymm1,%ymm0,%ymm0 - vpminsd %ymm3,%ymm0,%ymm2 - vpmaxsd %ymm3,%ymm0,%ymm0 - vpunpckldq %ymm0,%ymm2,%ymm1 - vpunpckhdq %ymm0,%ymm2,%ymm0 - vperm2i128 $32,%ymm0,%ymm1,%ymm2 - vperm2i128 $49,%ymm0,%ymm1,%ymm0 - test %esi,%esi - je .L46 - vpcmpeqd %ymm1,%ymm1,%ymm1 - vpxor %ymm1,%ymm2,%ymm2 - vpxor %ymm1,%ymm0,%ymm0 -.L46: vmovdqu %ymm2,(%rdi) - vmovdqu %ymm0,32(%rdi) - ret - .endfn merge16_finish,globl - - .p2align 4 -djbsort$avx2_2power: - push %r13 - mov %rdi,%r11 - lea 16(%rsp),%r13 - and $-32,%rsp - push -8(%r13) - push %rbp - mov %rsp,%rbp - push %r15 - push %r14 - push %r13 - push %r12 - push %rbx - sub $200,%rsp - mov %rsi,-144(%rbp) - mov %edx,-164(%rbp) - cmp $8,%rsi - je .L194 - cmpq $16,-144(%rbp) - je .L195 - cmpq $32,-144(%rbp) - je .L196 - mov %rsi,%r15 - sar $3,%r15 - test %r15,%r15 - jle .L197 - lea -1(%r15),%rbx - mov %rbx,-200(%rbp) - shr $3,%rbx - mov %rbx,%rdx - lea 32(%r11),%rbx - lea (%r15,%r15),%r8 - mov %rbx,-120(%rbp) - lea 0(,%r15,4),%rsi - lea (%r8,%r15),%rdi - lea 0(,%r15,8),%rcx - sal $5,%rdx - lea (%rdi,%rdi),%r10 - lea (%rsi,%r15),%r9 - sub %r15,%rcx - mov %r11,%rax - add %rbx,%rdx -.L61: vmovdqu (%rax),%ymm0 - vmovdqu (%rax,%rsi,4),%ymm2 - vmovdqu (%rax,%r10,4),%ymm3 - vpminsd %ymm2,%ymm0,%ymm4 - vpmaxsd %ymm2,%ymm0,%ymm2 - vmovdqu (%rax,%r8,4),%ymm0 - vpminsd %ymm3,%ymm0,%ymm1 - vpmaxsd %ymm3,%ymm0,%ymm0 - vpminsd %ymm2,%ymm0,%ymm3 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpminsd %ymm4,%ymm1,%ymm2 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm1,%ymm3,%ymm4 - vpmaxsd %ymm1,%ymm3,%ymm1 - vmovdqu %ymm0,(%rax) - vmovdqu %ymm4,(%rax,%r8,4) - vmovdqu %ymm1,(%rax,%rsi,4) - vmovdqu %ymm2,(%rax,%r10,4) - vmovdqu (%rax,%r15,4),%ymm2 - vmovdqu (%rax,%r9,4),%ymm0 - vmovdqu (%rax,%rdi,4),%ymm4 - vpminsd %ymm2,%ymm0,%ymm1 - vpmaxsd %ymm2,%ymm0,%ymm0 - vmovdqu (%rax,%rcx,4),%ymm2 - vpminsd %ymm4,%ymm2,%ymm3 - vpmaxsd %ymm4,%ymm2,%ymm2 - vpminsd %ymm3,%ymm1,%ymm4 - vpmaxsd %ymm3,%ymm1,%ymm1 - vpminsd %ymm2,%ymm0,%ymm3 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpminsd %ymm1,%ymm3,%ymm2 - vpmaxsd %ymm1,%ymm3,%ymm1 - vmovdqu %ymm4,(%rax,%r15,4) - vmovdqu %ymm1,(%rax,%rdi,4) - vmovdqu %ymm2,(%rax,%r9,4) - vmovdqu %ymm0,(%rax,%rcx,4) - add $32,%rax - cmp %rdx,%rax - jne .L61 -.L62: lea 0(,%r15,8),%rax - sub %r15,%rax - lea (%r15,%r15),%r12 - mov %rax,%r9 - mov -144(%rbp),%rax - lea 0(,%r15,4),%rbx - lea (%r12,%r15),%r13 - lea (%rbx,%r15),%r10 - lea (%r13,%r13),%r14 - cmp $127,%rax - jg .L59 - lea 64(%r11),%rdi - dec %rax - mov %rdi,-192(%rbp) - mov %rax,-176(%rbp) -.L60: mov -144(%rbp),%rdi - mov %r11,-208(%rbp) - lea (%r11,%rdi,4),%rax - mov %rax,-112(%rbp) - mov %rdi,%rax - sar $4,%rax - cmp $32,%rax - sete %dl - cmp $127,%rax - mov %rax,-80(%rbp) - setg %al - or %eax,%edx - mov -176(%rbp),%rax - mov %dl,-152(%rbp) - shr $4,%rax - sal $6,%rax - add -192(%rbp),%rax - mov %rax,-128(%rbp) - mov -200(%rbp),%rax - movl $3,-184(%rbp) - shr $3,%rax - sal $5,%rax - add -120(%rbp),%rax - mov %rax,-160(%rbp) - movq $4,-136(%rbp) - mov %r12,-200(%rbp) - mov %r13,-216(%rbp) - mov %r10,-224(%rbp) - mov %r9,-232(%rbp) - vmovdqa .LC1(%rip),%ymm11 - vmovdqa .LC3(%rip),%ymm10 - vmovdqa .LC2(%rip),%ymm12 - mov %rbx,-192(%rbp) - mov %rdi,%rbx -.L63: cmpq $4,-136(%rbp) - je .L198 - cmpq $2,-136(%rbp) - je .L91 - mov -112(%rbp),%rdx - mov %r11,%rax - cmp -112(%rbp),%r11 - je .L90 -.L92: vpxor 32(%rax),%ymm10,%ymm2 - vpxor (%rax),%ymm10,%ymm1 - add $64,%rax - vperm2i128 $32,%ymm2,%ymm1,%ymm0 - vperm2i128 $49,%ymm2,%ymm1,%ymm1 - vpunpcklqdq %ymm1,%ymm0,%ymm2 - vpunpckhqdq %ymm1,%ymm0,%ymm0 - vpminsd %ymm0,%ymm2,%ymm1 - vpmaxsd %ymm0,%ymm2,%ymm2 - vpunpcklqdq %ymm2,%ymm1,%ymm0 - vpunpckhqdq %ymm2,%ymm1,%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vperm2i128 $32,%ymm0,%ymm2,%ymm1 - vperm2i128 $49,%ymm0,%ymm2,%ymm0 - vmovdqu %ymm1,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,%rdx - jne .L92 -.L90: cmpb $0,-152(%rbp) - mov -80(%rbp),%r12 - je .L89 - mov %rbx,%r13 - mov %r11,%rbx - .p2align 4,,10 - .p2align 3 -.L146: mov %r12,%rdx - sar $2,%rdx - mov %r13,%rsi - mov %rbx,%rdi - vzeroupper - sar $3,%r12 - call int32_threestages - cmp $127,%r12 - vmovdqa .LC1(%rip),%ymm11 - vmovdqa .LC3(%rip),%ymm10 - vmovdqa .LC2(%rip),%ymm12 - jg .L146 - cmp $32,%r12 - je .L146 - mov %rbx,%r11 - mov %r13,%rbx -.L89: cmp $15,%r12 - jle .L94 - mov -120(%rbp),%r13 - .p2align 4,,10 - .p2align 3 -.L100: mov %r12,%rdx - sar %rdx - test %rbx,%rbx - jle .L95 - lea (%rdx,%rdx),%rcx - lea -1(%rdx),%r9 - lea (%rcx,%rdx),%rsi - lea 0(,%rdx,4),%r10 - xor %r8d,%r8d - and $-8,%r9 - .p2align 4,,10 - .p2align 3 -.L96: lea (%rdx,%r8),%rax - cmp %rax,%r8 - jge .L99 - lea (%r9,%r8),%rdi - lea (%r11,%r8,4),%rax - lea 0(%r13,%rdi,4),%rdi - .p2align 4,,10 - .p2align 3 -.L97: vmovdqu (%rax,%rcx,4),%ymm1 - vmovdqu (%rax),%ymm0 - vmovdqu (%rax,%rsi,4),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu (%rax,%rdx,4),%ymm1 - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm4,(%rax) - vmovdqu %ymm2,(%rax,%rdx,4) - vmovdqu %ymm3,(%rax,%rcx,4) - vmovdqu %ymm0,(%rax,%rsi,4) - add $32,%rax - cmp %rdi,%rax - jne .L97 -.L99: add %r10,%r8 - cmp %r8,%rbx - jg .L96 -.L95: sar $2,%r12 - cmp $15,%r12 - jg .L100 -.L94: cmp $8,%r12 - je .L101 -.L104: mov %r11,%rax - test %r15,%r15 - jle .L103 - mov -160(%rbp),%r9 - mov -192(%rbp),%rdx - mov -200(%rbp),%rcx - mov -216(%rbp),%rsi - mov -224(%rbp),%rdi - mov -232(%rbp),%r8 - .p2align 4,,10 - .p2align 3 -.L102: vmovdqu (%rax,%r15,4),%ymm0 - vmovdqu (%rax),%ymm1 - vmovdqu (%rax,%rcx,4),%ymm2 - vmovdqu (%rax,%rdi,4),%ymm4 - vmovdqu (%rax,%rdx,4),%ymm7 - vpminsd %ymm0,%ymm1,%ymm5 - vpmaxsd %ymm0,%ymm1,%ymm1 - vmovdqu (%rax,%rsi,4),%ymm0 - vmovdqu (%rax,%r14,4),%ymm8 - vpminsd %ymm0,%ymm2,%ymm3 - vpmaxsd %ymm0,%ymm2,%ymm0 - vpminsd %ymm4,%ymm7,%ymm2 - vpmaxsd %ymm4,%ymm7,%ymm7 - vmovdqu (%rax,%r8,4),%ymm4 - vpminsd %ymm3,%ymm5,%ymm9 - vpminsd %ymm4,%ymm8,%ymm6 - vpmaxsd %ymm4,%ymm8,%ymm4 - vpmaxsd %ymm3,%ymm5,%ymm5 - vpminsd %ymm0,%ymm1,%ymm8 - vpminsd %ymm6,%ymm2,%ymm3 - vpmaxsd %ymm0,%ymm1,%ymm0 - vpmaxsd %ymm6,%ymm2,%ymm1 - vpminsd %ymm4,%ymm7,%ymm2 - vpmaxsd %ymm4,%ymm7,%ymm4 - vpminsd %ymm3,%ymm9,%ymm6 - vpminsd %ymm2,%ymm8,%ymm7 - vpmaxsd %ymm3,%ymm9,%ymm3 - vpmaxsd %ymm2,%ymm8,%ymm2 - vpminsd %ymm1,%ymm5,%ymm8 - vpmaxsd %ymm1,%ymm5,%ymm1 - vpminsd %ymm4,%ymm0,%ymm5 - vpmaxsd %ymm4,%ymm0,%ymm0 - vmovdqu %ymm6,(%rax) - vmovdqu %ymm7,(%rax,%r15,4) - vmovdqu %ymm8,(%rax,%rcx,4) - vmovdqu %ymm5,(%rax,%rsi,4) - vmovdqu %ymm3,(%rax,%rdx,4) - vmovdqu %ymm2,(%rax,%rdi,4) - vmovdqu %ymm1,(%rax,%r14,4) - vmovdqu %ymm0,(%rax,%r8,4) - add $32,%rax - cmp %rax,%r9 - jne .L102 -.L103: sarq -136(%rbp) - decl -184(%rbp) - jne .L63 - cmpq $0,-144(%rbp) - jle .L113 - mov -176(%rbp),%rax - vpcmpeqd %ymm4,%ymm4,%ymm4 - shr $6,%rax - sal $8,%rax - lea 256(%r11,%rax),%rdx - mov %r11,%rax - jmp .L112 -.L199: vpxor %ymm4,%ymm7,%ymm7 - vpxor %ymm4,%ymm2,%ymm2 - vpxor %ymm4,%ymm1,%ymm1 - vpxor %ymm4,%ymm0,%ymm0 -.L111: vperm2i128 $32,%ymm5,%ymm9,%ymm11 - vperm2i128 $32,%ymm6,%ymm10,%ymm3 - vperm2i128 $32,%ymm1,%ymm7,%ymm12 - vperm2i128 $32,%ymm0,%ymm2,%ymm8 - vperm2i128 $49,%ymm6,%ymm10,%ymm6 - vperm2i128 $49,%ymm5,%ymm9,%ymm9 - vperm2i128 $49,%ymm1,%ymm7,%ymm1 - vperm2i128 $49,%ymm0,%ymm2,%ymm0 - vpminsd %ymm3,%ymm12,%ymm7 - vpmaxsd %ymm11,%ymm8,%ymm2 - vpminsd %ymm9,%ymm0,%ymm10 - vpminsd %ymm6,%ymm1,%ymm5 - vpmaxsd %ymm9,%ymm0,%ymm0 - vpmaxsd %ymm3,%ymm12,%ymm3 - vpmaxsd %ymm6,%ymm1,%ymm1 - vpminsd %ymm11,%ymm8,%ymm12 - vpminsd %ymm12,%ymm7,%ymm9 - vpmaxsd %ymm12,%ymm7,%ymm6 - vpminsd %ymm10,%ymm5,%ymm8 - vpminsd %ymm2,%ymm3,%ymm7 - vpmaxsd %ymm10,%ymm5,%ymm5 - vpmaxsd %ymm2,%ymm3,%ymm3 - vpminsd %ymm0,%ymm1,%ymm2 - vpmaxsd %ymm0,%ymm1,%ymm1 - vpminsd %ymm8,%ymm9,%ymm10 - vpmaxsd %ymm5,%ymm6,%ymm0 - vpmaxsd %ymm8,%ymm9,%ymm8 - vpminsd %ymm2,%ymm7,%ymm9 - vpmaxsd %ymm2,%ymm7,%ymm7 - vpminsd %ymm5,%ymm6,%ymm2 - vpminsd %ymm1,%ymm3,%ymm5 - vpmaxsd %ymm1,%ymm3,%ymm3 - vpunpckldq %ymm9,%ymm10,%ymm11 - vpunpckhdq %ymm9,%ymm10,%ymm6 - vpunpckldq %ymm7,%ymm8,%ymm1 - vpunpckldq %ymm5,%ymm2,%ymm9 - vpunpckldq %ymm3,%ymm0,%ymm10 - vpunpckhdq %ymm5,%ymm2,%ymm2 - vpunpckhdq %ymm3,%ymm0,%ymm0 - vpunpckhdq %ymm7,%ymm8,%ymm5 - vpunpcklqdq %ymm9,%ymm11,%ymm3 - vpunpcklqdq %ymm2,%ymm6,%ymm8 - vpunpckhqdq %ymm9,%ymm11,%ymm7 - vpunpckhqdq %ymm2,%ymm6,%ymm6 - vpunpcklqdq %ymm0,%ymm5,%ymm9 - vpunpcklqdq %ymm10,%ymm1,%ymm2 - vpunpckhqdq %ymm0,%ymm5,%ymm0 - vpunpckhqdq %ymm10,%ymm1,%ymm1 - vperm2i128 $32,%ymm2,%ymm3,%ymm12 - vperm2i128 $32,%ymm1,%ymm7,%ymm11 - vperm2i128 $32,%ymm0,%ymm6,%ymm5 - vperm2i128 $49,%ymm2,%ymm3,%ymm3 - vperm2i128 $32,%ymm9,%ymm8,%ymm10 - vperm2i128 $49,%ymm1,%ymm7,%ymm2 - vperm2i128 $49,%ymm0,%ymm6,%ymm0 - vperm2i128 $49,%ymm9,%ymm8,%ymm1 - vmovdqu %ymm12,(%rax) - vmovdqu %ymm11,32(%rax) - vmovdqu %ymm10,64(%rax) - vmovdqu %ymm5,96(%rax) - vmovdqu %ymm3,128(%rax) - vmovdqu %ymm2,160(%rax) - vmovdqu %ymm1,192(%rax) - vmovdqu %ymm0,224(%rax) - add $256,%rax - cmp %rdx,%rax - je .L113 -.L112: vmovdqu 32(%rax),%ymm0 - vmovdqu (%rax),%ymm2 - vmovdqu 128(%rax),%ymm3 - vpunpckhdq %ymm0,%ymm2,%ymm5 - vpunpckldq %ymm0,%ymm2,%ymm7 - vmovdqu 96(%rax),%ymm0 - vmovdqu 64(%rax),%ymm2 - vmovdqu 224(%rax),%ymm9 - vpunpckldq %ymm0,%ymm2,%ymm6 - vpunpckhdq %ymm0,%ymm2,%ymm2 - vmovdqu 160(%rax),%ymm0 - mov -164(%rbp),%ebx - vpunpckldq %ymm0,%ymm3,%ymm1 - vpunpckhdq %ymm0,%ymm3,%ymm0 - vmovdqu 192(%rax),%ymm3 - vpunpcklqdq %ymm6,%ymm7,%ymm10 - vpunpckldq %ymm9,%ymm3,%ymm8 - vpunpckhdq %ymm9,%ymm3,%ymm3 - vpunpckhqdq %ymm6,%ymm7,%ymm7 - vpunpcklqdq %ymm2,%ymm5,%ymm9 - vpunpcklqdq %ymm8,%ymm1,%ymm6 - vpunpckhqdq %ymm2,%ymm5,%ymm2 - vpunpckhqdq %ymm8,%ymm1,%ymm1 - vpunpcklqdq %ymm3,%ymm0,%ymm5 - vpunpckhqdq %ymm3,%ymm0,%ymm0 - test %ebx,%ebx - jne .L199 - vpxor %ymm4,%ymm10,%ymm10 - vpxor %ymm4,%ymm9,%ymm9 - vpxor %ymm4,%ymm6,%ymm6 - vpxor %ymm4,%ymm5,%ymm5 - jmp .L111 -.L91: mov -112(%rbp),%rdx - cmp %rdx,%r11 - je .L90 - mov %r11,%rax -.L93: vpxor 32(%rax),%ymm11,%ymm2 - vpxor (%rax),%ymm11,%ymm1 - add $64,%rax - vperm2i128 $32,%ymm2,%ymm1,%ymm0 - vperm2i128 $49,%ymm2,%ymm1,%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vperm2i128 $32,%ymm0,%ymm2,%ymm1 - vperm2i128 $49,%ymm0,%ymm2,%ymm0 - vmovdqu %ymm1,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,%rdx - jne .L93 - jmp .L90 -.L101: test %rbx,%rbx - jle .L104 - mov %r11,%rax -.L105: vmovdqu 32(%rax),%ymm1 - vmovdqu (%rax),%ymm0 - add $64,%rax - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,-128(%rbp) - jne .L105 - jmp .L104 -.L198: mov %r11,%rax - cmp -112(%rbp),%r11 - je .L90 -.L87: vpxor 32(%rax),%ymm12,%ymm0 - vpxor (%rax),%ymm12,%ymm1 - vmovdqu %ymm0,32(%rax) - vmovdqu %ymm1,(%rax) - add $64,%rax - cmp %rax,-112(%rbp) - jne .L87 - jmp .L90 -.L113: cmpb $0,-152(%rbp) - mov -80(%rbp),%r13 - je .L109 - mov %r15,-112(%rbp) - mov -120(%rbp),%r15 -.L145: mov -80(%rbp),%rdx - sar $2,%rdx - cmpq $0,-144(%rbp) - jle .L114 - lea (%rdx,%rdx),%rdi - lea 0(,%rdx,8),%r14 - lea (%rdi,%rdx),%rcx - lea 0(,%rdx,4),%rsi - mov %r14,%r8 - lea -1(%rdx),%r13 - lea (%rsi,%rdx),%r10 - lea (%rcx,%rcx),%r9 - sub %rdx,%r8 - xor %r12d,%r12d - and $-8,%r13 - .p2align 4,,10 - .p2align 3 -.L115: lea (%rdx,%r12),%rax - cmp %rax,%r12 - jge .L118 - lea 0(%r13,%r12),%rbx - lea (%r11,%r12,4),%rax - lea (%r15,%rbx,4),%rbx - .p2align 4,,10 - .p2align 3 -.L116: vmovdqu (%rax,%rsi,4),%ymm0 - vmovdqu (%rax),%ymm6 - vmovdqu (%rax,%rdx,4),%ymm1 - vpminsd %ymm0,%ymm6,%ymm7 - vpmaxsd %ymm0,%ymm6,%ymm6 - vmovdqu (%rax,%r10,4),%ymm0 - vmovdqu (%rax,%r8,4),%ymm8 - vpmaxsd %ymm0,%ymm1,%ymm3 - vpminsd %ymm0,%ymm1,%ymm2 - vmovdqu (%rax,%r9,4),%ymm1 - vmovdqu (%rax,%rdi,4),%ymm0 - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu (%rax,%rcx,4),%ymm1 - vpminsd %ymm8,%ymm1,%ymm5 - vpmaxsd %ymm8,%ymm1,%ymm1 - vpminsd %ymm4,%ymm7,%ymm8 - vpmaxsd %ymm4,%ymm7,%ymm4 - vpminsd %ymm5,%ymm2,%ymm7 - vpmaxsd %ymm5,%ymm2,%ymm2 - vpminsd %ymm0,%ymm6,%ymm5 - vpmaxsd %ymm0,%ymm6,%ymm0 - vpminsd %ymm1,%ymm3,%ymm6 - vpmaxsd %ymm1,%ymm3,%ymm1 - vpminsd %ymm7,%ymm8,%ymm9 - vpmaxsd %ymm7,%ymm8,%ymm3 - vpminsd %ymm2,%ymm4,%ymm8 - vpminsd %ymm6,%ymm5,%ymm7 - vpmaxsd %ymm2,%ymm4,%ymm2 - vpmaxsd %ymm6,%ymm5,%ymm5 - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm9,(%rax) - vmovdqu %ymm3,(%rax,%rdx,4) - vmovdqu %ymm8,(%rax,%rdi,4) - vmovdqu %ymm2,(%rax,%rcx,4) - vmovdqu %ymm7,(%rax,%rsi,4) - vmovdqu %ymm5,(%rax,%r10,4) - vmovdqu %ymm4,(%rax,%r9,4) - vmovdqu %ymm0,(%rax,%r8,4) - add $32,%rax - cmp %rbx,%rax - jne .L116 -.L118: add %r14,%r12 - cmp %r12,-144(%rbp) - jg .L115 -.L114: sarq $3,-80(%rbp) - mov -80(%rbp),%rax - cmp $127,%rax - jg .L145 - cmp $32,%rax - je .L145 - mov -112(%rbp),%r15 - mov %rax,%r13 -.L109: cmp $15,%r13 - jle .L119 - mov -144(%rbp),%r10 - mov -120(%rbp),%r12 -.L125: mov %r13,%rdx - sar %rdx - test %r10,%r10 - jle .L120 - lea (%rdx,%rdx),%rcx - lea -1(%rdx),%r9 - lea (%rcx,%rdx),%rsi - lea 0(,%rdx,4),%rbx - xor %r8d,%r8d - and $-8,%r9 - .p2align 4,,10 - .p2align 3 -.L121: lea (%rdx,%r8),%rax - cmp %rax,%r8 - jge .L124 - lea (%r9,%r8),%rdi - lea (%r11,%r8,4),%rax - lea (%r12,%rdi,4),%rdi - .p2align 4,,10 - .p2align 3 -.L122: vmovdqu (%rax,%rcx,4),%ymm1 - vmovdqu (%rax),%ymm0 - vmovdqu (%rax,%rsi,4),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu (%rax,%rdx,4),%ymm1 - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm4,(%rax) - vmovdqu %ymm2,(%rax,%rdx,4) - vmovdqu %ymm3,(%rax,%rcx,4) - vmovdqu %ymm0,(%rax,%rsi,4) - add $32,%rax - cmp %rax,%rdi - jne .L122 -.L124: add %rbx,%r8 - cmp %r8,%r10 - jg .L121 -.L120: sar $2,%r13 - cmp $15,%r13 - jg .L125 - mov %r13,-80(%rbp) -.L119: cmpq $8,-80(%rbp) - je .L126 -.L129: test %r15,%r15 - jle .L192 - lea (%r15,%r15),%rsi - lea (%rsi,%r15),%rdx - lea 0(,%r15,4),%rcx - lea 0(,%r15,8),%rax - mov -208(%rbp),%r9 - lea (%rcx,%r15),%r8 - lea (%rdx,%rdx),%rdi - sub %r15,%rax - vpcmpeqd %ymm6,%ymm6,%ymm6 -.L132: vmovdqu (%r9,%r15,4),%ymm1 - vmovdqu (%r9),%ymm0 - vmovdqu (%r9,%r8,4),%ymm8 - vpmaxsd %ymm0,%ymm1,%ymm4 - vpminsd %ymm0,%ymm1,%ymm5 - vmovdqu (%r9,%rdx,4),%ymm0 - vmovdqu (%r9,%rsi,4),%ymm1 - vmovdqu (%r9,%rdi,4),%ymm7 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm1 - vmovdqu (%r9,%rcx,4),%ymm0 - mov -164(%rbp),%r10d - vpminsd %ymm0,%ymm8,%ymm2 - vpmaxsd %ymm0,%ymm8,%ymm8 - vmovdqu (%r9,%rax,4),%ymm0 - vpminsd %ymm7,%ymm0,%ymm10 - vpmaxsd %ymm7,%ymm0,%ymm0 - vpminsd %ymm10,%ymm2,%ymm9 - vpminsd %ymm3,%ymm5,%ymm7 - vpmaxsd %ymm10,%ymm2,%ymm2 - vpmaxsd %ymm3,%ymm5,%ymm5 - vpminsd %ymm1,%ymm4,%ymm3 - vpmaxsd %ymm1,%ymm4,%ymm1 - vpminsd %ymm0,%ymm8,%ymm4 - vpmaxsd %ymm0,%ymm8,%ymm8 - vpminsd %ymm4,%ymm3,%ymm11 - vpminsd %ymm9,%ymm7,%ymm0 - vpmaxsd %ymm4,%ymm3,%ymm3 - vpmaxsd %ymm9,%ymm7,%ymm7 - vpminsd %ymm8,%ymm1,%ymm4 - vpminsd %ymm2,%ymm5,%ymm9 - vpmaxsd %ymm8,%ymm1,%ymm1 - vpmaxsd %ymm2,%ymm5,%ymm2 - vpunpckldq %ymm3,%ymm11,%ymm10 - vpunpckhdq %ymm2,%ymm9,%ymm5 - vpunpckhdq %ymm3,%ymm11,%ymm3 - vpunpckldq %ymm7,%ymm0,%ymm8 - vpunpckldq %ymm2,%ymm9,%ymm11 - vpunpckhdq %ymm7,%ymm0,%ymm0 - vpunpckldq %ymm1,%ymm4,%ymm9 - vpunpckhdq %ymm1,%ymm4,%ymm4 - vpunpcklqdq %ymm5,%ymm0,%ymm2 - vpunpcklqdq %ymm9,%ymm10,%ymm13 - vpunpcklqdq %ymm4,%ymm3,%ymm12 - vpunpcklqdq %ymm11,%ymm8,%ymm7 - vpunpckhqdq %ymm9,%ymm10,%ymm1 - vpunpckhqdq %ymm11,%ymm8,%ymm8 - vpunpckhqdq %ymm4,%ymm3,%ymm4 - vpunpckhqdq %ymm5,%ymm0,%ymm0 - vperm2i128 $32,%ymm12,%ymm2,%ymm10 - vperm2i128 $32,%ymm1,%ymm8,%ymm9 - vperm2i128 $32,%ymm4,%ymm0,%ymm5 - vperm2i128 $32,%ymm13,%ymm7,%ymm11 - vperm2i128 $49,%ymm13,%ymm7,%ymm3 - vperm2i128 $49,%ymm12,%ymm2,%ymm2 - vperm2i128 $49,%ymm1,%ymm8,%ymm1 - vperm2i128 $49,%ymm4,%ymm0,%ymm0 - test %r10d,%r10d - je .L131 - vpxor %ymm6,%ymm11,%ymm11 - vpxor %ymm6,%ymm10,%ymm10 - vpxor %ymm6,%ymm9,%ymm9 - vpxor %ymm6,%ymm5,%ymm5 - vpxor %ymm6,%ymm3,%ymm3 - vpxor %ymm6,%ymm2,%ymm2 - vpxor %ymm6,%ymm1,%ymm1 - vpxor %ymm6,%ymm0,%ymm0 -.L131: vmovdqu %ymm11,(%r9) - vmovdqu %ymm3,(%r9,%r15,4) - vmovdqu %ymm10,(%r9,%rsi,4) - vmovdqu %ymm2,(%r9,%rdx,4) - vmovdqu %ymm9,(%r9,%rcx,4) - vmovdqu %ymm1,(%r9,%r8,4) - vmovdqu %ymm5,(%r9,%rdi,4) - vmovdqu %ymm0,(%r9,%rax,4) - add $32,%r9 - cmp %r9,-160(%rbp) - jne .L132 -.L192: vzeroupper -.L190: add $200,%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - lea -16(%r13),%rsp - pop %r13 - ret -.L59: dec %rax - mov %rax,-176(%rbp) - shr $5,%rax - sal $7,%rax - lea 128(%r11,%rax),%rax - mov %rax,-184(%rbp) - vpcmpeqd %ymm0,%ymm0,%ymm0 - mov %r11,%rax -.L64: vpxor 64(%rax),%ymm0,%ymm1 - vpxor (%rax),%ymm0,%ymm2 - vmovdqu %ymm1,64(%rax) - vmovdqu %ymm2,(%rax) - sub $-128,%rax - cmp -184(%rbp),%rax - jne .L64 - mov -176(%rbp),%rdi - lea 64(%r11),%rsi - mov %rdi,%rax - shr $4,%rax - sal $6,%rax - add %rsi,%rax - mov %rax,-208(%rbp) - mov %rdi,%rax - shr $6,%rax - sal $8,%rax - lea 256(%r11,%rax),%rax - mov $4,%ecx - mov %r14,%r8 - mov %rsi,-192(%rbp) - mov %rax,-216(%rbp) - movq $8,-112(%rbp) - vpcmpeqd %ymm11,%ymm11,%ymm11 - mov %r10,%r14 - cmp $64,%rcx - je .L200 -.L68: cmp $32,%rcx - je .L201 - cmp $16,%rcx - je .L74 - cmp $8,%rcx - je .L202 -.L76: mov -112(%rbp),%rdi - xor %edx,%edx - lea (%rdi,%rdi),%rax - cmp %r15,%rax - mov %rax,-152(%rbp) - setne %al - movzbl %al,%eax - mov %eax,-160(%rbp) - lea -1(%rdi),%rax - sete %dl - and $-8,%rax - movq $0,-136(%rbp) - mov %rax,-128(%rbp) - mov %rdi,%r10 - test %r15,%r15 - jle .L73 -.L78: mov -112(%rbp),%rax - mov -136(%rbp),%rdi - add %r10,%rax - cmp %rdi,%rax - jle .L81 - mov %rdi,%rsi - .p2align 4,,10 - .p2align 3 -.L84: mov %rsi,%rcx - mov %rsi,%rdi - add -112(%rbp),%rsi - cmp %rsi,%rcx - jge .L83 - lea (%r11,%rcx,4),%rax - mov %rax,-80(%rbp) - mov -120(%rbp),%rax - add -128(%rbp),%rcx - lea (%rax,%rcx,4),%rcx - mov -80(%rbp),%rax - .p2align 4,,10 - .p2align 3 -.L80: vmovdqu (%rax),%ymm0 - vmovdqu (%rax,%r15,4),%ymm15 - vmovdqu (%rax,%r13,4),%ymm7 - vpminsd %ymm0,%ymm15,%ymm6 - vpmaxsd %ymm0,%ymm15,%ymm15 - vmovdqu (%rax,%r12,4),%ymm0 - vmovdqu (%rax,%r14,4),%ymm5 - vpminsd %ymm0,%ymm7,%ymm1 - vpmaxsd %ymm0,%ymm7,%ymm7 - vmovdqu (%rax,%rbx,4),%ymm0 - vmovdqu (%rax,%r9,4),%ymm4 - vpminsd %ymm0,%ymm5,%ymm9 - vpmaxsd %ymm0,%ymm5,%ymm5 - vmovdqu (%rax,%r8,4),%ymm0 - vpminsd %ymm1,%ymm6,%ymm8 - vpminsd %ymm0,%ymm4,%ymm3 - vpmaxsd %ymm0,%ymm4,%ymm4 - vpminsd %ymm3,%ymm9,%ymm2 - vpmaxsd %ymm4,%ymm5,%ymm0 - vpmaxsd %ymm3,%ymm9,%ymm3 - vpmaxsd %ymm1,%ymm6,%ymm6 - vpminsd %ymm7,%ymm15,%ymm1 - vpmaxsd %ymm7,%ymm15,%ymm15 - vpminsd %ymm4,%ymm5,%ymm7 - vpminsd %ymm2,%ymm8,%ymm14 - vpminsd %ymm7,%ymm1,%ymm13 - vpminsd %ymm3,%ymm6,%ymm12 - vpminsd %ymm0,%ymm15,%ymm10 - vpmaxsd %ymm3,%ymm6,%ymm6 - vpmaxsd %ymm2,%ymm8,%ymm2 - vpmaxsd %ymm7,%ymm1,%ymm1 - vpmaxsd %ymm0,%ymm15,%ymm0 - vmovdqa %ymm6,-80(%rbp) - vmovdqa %ymm6,%ymm3 - vmovdqa %ymm14,%ymm9 - vmovdqa %ymm2,%ymm5 - vmovdqa %ymm13,%ymm8 - vmovdqa %ymm1,%ymm4 - vmovdqa %ymm12,%ymm7 - vmovdqa %ymm10,%ymm6 - vmovdqa %ymm0,%ymm15 - test %edx,%edx - je .L79 - vpxor -80(%rbp),%ymm11,%ymm3 - vpxor %ymm14,%ymm11,%ymm9 - vpxor %ymm13,%ymm11,%ymm8 - vpxor %ymm12,%ymm11,%ymm7 - vpxor %ymm10,%ymm11,%ymm6 - vpxor %ymm2,%ymm11,%ymm5 - vpxor %ymm1,%ymm11,%ymm4 - vpxor %ymm0,%ymm11,%ymm15 -.L79: vmovdqu %ymm9,(%rax) - vmovdqu %ymm8,(%rax,%r15,4) - vmovdqu %ymm7,(%rax,%r12,4) - vmovdqu %ymm6,(%rax,%r13,4) - vmovdqu %ymm5,(%rax,%rbx,4) - vmovdqu %ymm4,(%rax,%r14,4) - vmovdqu %ymm3,(%rax,%r8,4) - vmovdqu %ymm15,(%rax,%r9,4) - add $32,%rax - cmp %rax,%rcx - jne .L80 -.L83: xor $1,%edx - cmp %rdi,%r10 - jg .L84 -.L81: mov -152(%rbp),%rdi - xor -160(%rbp),%edx - add %rdi,-136(%rbp) - add %rdi,%r10 - mov -136(%rbp),%rax - cmp %rax,%r15 - jg .L78 -.L73: mov -112(%rbp),%rax - sal $4,%rax - cmp -144(%rbp),%rax - je .L203 - mov -152(%rbp),%rax - mov %rax,%rcx - sar %rcx - cmp $254,%rax - jle .L66 - mov %r8,-136(%rbp) - mov %r9,-160(%rbp) - mov %r15,-80(%rbp) - mov -144(%rbp),%r15 - mov %rbx,-112(%rbp) - mov %r12,-128(%rbp) - mov %rcx,%rbx - mov %r11,%r12 -.L67: mov %rbx,%rdx - sar $2,%rdx - mov %r15,%rsi - mov %r12,%rdi - vzeroupper - sar $3,%rbx - call int32_threestages - cmp $127,%rbx - vpcmpeqd %ymm11,%ymm11,%ymm11 - jg .L67 - mov %rbx,%rcx - mov %r12,%r11 - mov -80(%rbp),%r15 - mov -112(%rbp),%rbx - mov -128(%rbp),%r12 - mov -136(%rbp),%r8 - mov -160(%rbp),%r9 -.L66: mov -152(%rbp),%rax - mov %rax,-112(%rbp) - cmp $64,%rcx - jne .L68 -.L200: mov -144(%rbp),%rsi - mov %r11,%rdi - vzeroupper - call int32_twostages_32 - vpcmpeqd %ymm11,%ymm11,%ymm11 -.L74: mov %r11,%rax -.L69: vmovdqu 64(%rax),%ymm1 - vmovdqu (%rax),%ymm0 - vmovdqu 96(%rax),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu 32(%rax),%ymm1 - sub $-128,%rax - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpminsd %ymm3,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm1,%ymm0,%ymm3 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm4,-128(%rax) - vmovdqu %ymm2,-96(%rax) - vmovdqu %ymm3,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp -184(%rbp),%rax - jne .L69 - jmp .L76 -.L202: mov %r11,%rax -.L77: vmovdqu 32(%rax),%ymm0 - vmovdqu (%rax),%ymm1 - add $64,%rax - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,-208(%rbp) - jne .L77 - jmp .L76 -.L203: mov %r14,%r10 - mov %r8,%r14 - jmp .L60 -.L201: mov %r11,%rax -.L71: vmovdqu 128(%rax),%ymm0 - vmovdqu (%rax),%ymm6 - vmovdqu 32(%rax),%ymm1 - vpminsd %ymm0,%ymm6,%ymm7 - vpmaxsd %ymm0,%ymm6,%ymm6 - vmovdqu 160(%rax),%ymm0 - vmovdqu 224(%rax),%ymm8 - vpminsd %ymm0,%ymm1,%ymm5 - vpmaxsd %ymm0,%ymm1,%ymm3 - vmovdqu 192(%rax),%ymm1 - vmovdqu 64(%rax),%ymm0 - add $256,%rax - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu -160(%rax),%ymm1 - vpminsd %ymm8,%ymm1,%ymm2 - vpmaxsd %ymm8,%ymm1,%ymm1 - vpminsd %ymm4,%ymm7,%ymm8 - vpmaxsd %ymm4,%ymm7,%ymm4 - vpminsd %ymm2,%ymm5,%ymm7 - vpmaxsd %ymm2,%ymm5,%ymm2 - vpminsd %ymm0,%ymm6,%ymm5 - vpmaxsd %ymm0,%ymm6,%ymm0 - vpminsd %ymm1,%ymm3,%ymm6 - vpmaxsd %ymm1,%ymm3,%ymm1 - vpminsd %ymm7,%ymm8,%ymm9 - vpmaxsd %ymm7,%ymm8,%ymm3 - vpminsd %ymm2,%ymm4,%ymm8 - vpminsd %ymm6,%ymm5,%ymm7 - vpmaxsd %ymm2,%ymm4,%ymm2 - vpmaxsd %ymm6,%ymm5,%ymm5 - vpminsd %ymm1,%ymm0,%ymm4 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm9,-256(%rax) - vmovdqu %ymm3,-224(%rax) - vmovdqu %ymm8,-192(%rax) - vmovdqu %ymm2,-160(%rax) - vmovdqu %ymm7,-128(%rax) - vmovdqu %ymm5,-96(%rax) - vmovdqu %ymm4,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,-216(%rbp) - jne .L71 - jmp .L76 -.L194: mov 4(%rdi),%eax - mov 12(%rdi),%ebx - cmp (%rdi),%eax - cmovg (%rdi),%ecx - cmovg %eax,%r9d - cmp 8(%rdi),%ebx - cmovg 8(%rdi),%eax - cmovg %ebx,%edi - cmp %ecx,%eax - cmovg %ecx,%r8d - cmovg %eax,%eax - cmp %r9d,%edi - cmovg %r9d,%edx - cmovg %edi,%r9d - cmp %eax,%edx - cmovg %eax,%r12d - cmovg %edx,%r10d - mov 20(%r11),%eax - cmp 16(%r11),%eax - cmovg 16(%r11),%esi - cmovg %eax,%ecx - mov 28(%r11),%eax - cmp 24(%r11),%eax - cmovg 24(%r11),%edx - cmovg %eax,%edi - cmp %ecx,%edi - cmovg %ecx,%eax - cmovg %edi,%edi - cmp %esi,%edx - cmovg %esi,%ecx - cmovg %edx,%edx - cmp %r9d,%edi - cmovg %r9d,%ebx - cmovg %edi,%edi - cmp %edx,%eax - cmovg %edx,%esi - cmovg %eax,%edx - mov %edi,(%r11) - cmp %r12d,%esi - cmovg %r12d,%r9d - cmovg %esi,%esi - cmp %r10d,%edx - cmovg %r10d,%eax - cmovg %edx,%edx - cmp %esi,%ebx - cmovg %esi,%r13d - cmovg %ebx,%ebx - cmp %r8d,%ecx - cmovg %r8d,%esi - cmovg %ecx,%ecx - cmp %ecx,%eax - cmovg %ecx,%r8d - cmovg %eax,%eax - mov %esi,28(%r11) - cmp %edx,%ebx - cmovg %edx,%r12d - cmovg %ebx,%ecx - cmp %eax,%r13d - cmovg %eax,%ebx - cmovg %r13d,%edx - mov %ecx,4(%r11) - cmp %r8d,%r9d - cmovg %r8d,%r10d - cmovg %r9d,%eax - mov %r12d,8(%r11) - mov %edx,12(%r11) - mov %ebx,16(%r11) - mov %eax,20(%r11) - mov %r10d,24(%r11) - jmp .L190 -.L126: cmpq $0,-144(%rbp) - jle .L129 - mov %r11,%rax -.L130: vmovdqu 32(%rax),%ymm1 - vmovdqu (%rax),%ymm0 - add $64,%rax - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,-64(%rax) - vmovdqu %ymm0,-32(%rax) - cmp %rax,-128(%rbp) - jne .L130 - jmp .L129 -.L195: vmovdqa .LC0(%rip),%ymm1 - vmovdqa .LC1(%rip),%ymm3 - vpxor 32(%rdi),%ymm1,%ymm2 - vpxor (%rdi),%ymm1,%ymm1 - mov -164(%rbp),%r14d - vpunpckldq %ymm2,%ymm1,%ymm0 - vpunpckhdq %ymm2,%ymm1,%ymm1 - vpunpcklqdq %ymm1,%ymm0,%ymm2 - vpunpckhqdq %ymm1,%ymm0,%ymm0 - vpminsd %ymm2,%ymm0,%ymm1 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpxor %ymm3,%ymm1,%ymm1 - vpxor %ymm3,%ymm0,%ymm0 - vpunpckldq %ymm0,%ymm1,%ymm4 - vpunpckhdq %ymm0,%ymm1,%ymm0 - vpmaxsd %ymm0,%ymm4,%ymm1 - vpminsd %ymm0,%ymm4,%ymm2 - vpunpcklqdq %ymm1,%ymm2,%ymm0 - vpunpckhqdq %ymm1,%ymm2,%ymm2 - vpunpckldq %ymm2,%ymm0,%ymm1 - vpunpckhdq %ymm2,%ymm0,%ymm0 - vpunpcklqdq %ymm0,%ymm1,%ymm4 - vpunpckhqdq %ymm0,%ymm1,%ymm1 - vpminsd %ymm4,%ymm1,%ymm2 - vpmaxsd %ymm4,%ymm1,%ymm1 - vpunpckldq %ymm1,%ymm2,%ymm0 - vpunpckhdq %ymm1,%ymm2,%ymm1 - vpxor %ymm3,%ymm1,%ymm1 - vpxor %ymm3,%ymm0,%ymm0 - vperm2i128 $32,%ymm1,%ymm0,%ymm2 - vperm2i128 $49,%ymm1,%ymm0,%ymm0 - vpminsd %ymm2,%ymm0,%ymm1 - vpmaxsd %ymm2,%ymm0,%ymm0 - vperm2i128 $32,%ymm0,%ymm1,%ymm3 - vperm2i128 $49,%ymm0,%ymm1,%ymm0 - vpminsd %ymm3,%ymm0,%ymm2 - vpmaxsd %ymm3,%ymm0,%ymm0 - vpunpcklqdq %ymm0,%ymm2,%ymm1 - vpunpckhqdq %ymm0,%ymm2,%ymm2 - vpunpckldq %ymm2,%ymm1,%ymm0 - vpunpckhdq %ymm2,%ymm1,%ymm1 - vpunpcklqdq %ymm1,%ymm0,%ymm3 - vpunpckhqdq %ymm1,%ymm0,%ymm0 - vpminsd %ymm3,%ymm0,%ymm2 - vpmaxsd %ymm3,%ymm0,%ymm0 - vpunpckldq %ymm0,%ymm2,%ymm1 - vpunpckhdq %ymm0,%ymm2,%ymm0 - vpunpcklqdq %ymm0,%ymm1,%ymm2 - vpunpckhqdq %ymm0,%ymm1,%ymm1 - vpcmpeqd %ymm0,%ymm0,%ymm0 - test %r14d,%r14d - je .L54 - vpxor %ymm0,%ymm1,%ymm1 - mov %edx,%esi -.L55: vmovdqa %ymm2,%ymm0 - mov %r11,%rdi -.L193: add $200,%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - lea -16(%r13),%rsp - pop %r13 - jmp merge16_finish -.L197: lea -1(%r15),%rax - mov %rax,-200(%rbp) - lea 32(%rdi),%rax - mov %rax,-120(%rbp) - jmp .L62 -.L196: mov $1,%edx - mov $16,%esi - mov %rdi,-80(%rbp) - call djbsort$avx2_2power - mov -80(%rbp),%r11 - xor %edx,%edx - lea 64(%r11),%r12 - mov $16,%esi - mov %r12,%rdi - call djbsort$avx2_2power - mov -80(%rbp),%r11 - mov -164(%rbp),%r13d - vmovdqu (%r11),%ymm4 - vmovdqu 32(%r11),%ymm1 - vmovdqu 64(%r11),%ymm2 - vmovdqu 96(%r11),%ymm3 - test %r13d,%r13d - je .L57 - vpcmpeqd %ymm0,%ymm0,%ymm0 - vpxor %ymm0,%ymm4,%ymm4 - vpxor %ymm0,%ymm1,%ymm1 - vpxor %ymm0,%ymm2,%ymm2 - vpxor %ymm0,%ymm3,%ymm3 -.L57: mov -164(%rbp),%esi - vpmaxsd %ymm1,%ymm3,%ymm5 - vpminsd %ymm4,%ymm2,%ymm0 - vpminsd %ymm1,%ymm3,%ymm1 - vpmaxsd %ymm4,%ymm2,%ymm4 - mov %r11,%rdi - vmovdqa %ymm4,-112(%rbp) - vmovdqa %ymm5,-80(%rbp) - call merge16_finish - vmovdqa -80(%rbp),%ymm5 - vmovdqa -112(%rbp),%ymm4 - vmovdqa %ymm5,%ymm1 - vmovdqa %ymm4,%ymm0 - mov %r12,%rdi - jmp .L193 -.L54: vpxor %ymm0,%ymm2,%ymm2 - mov %edx,%esi - jmp .L55 - .endfn djbsort$avx2_2power,globl - - .p2align 4 +/ D.J. Bernstein's outrageously fast integer sorting algorithm. +/ +/ @param rdi is int32 array +/ @param rsi is number of elements in rdi +/ @note public domain +/ @see en.wikipedia.org/wiki/Sorting_network djbsort$avx2: push %rbp mov %rsp,%rbp push %r15 - mov %rdi,%r15 push %r14 - mov %rsi,%r14 push %r13 + mov %rsi,%r13 push %r12 + mov %rdi,%r12 push %rbx - and $-32,%rsp + andq $-32,%rsp sub $1056,%rsp cmp $8,%rsi - jle .L265 - blsr %rsi,%rax - je .L220 - lea -8(%rsi),%rax - mov %rax,8(%rsp) + jg .L148 + jne .L149 + mov (%rdi),%eax + mov 4(%rdi),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,(%rdi) + mov 8(%rdi),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,4(%rdi) + mov 12(%rdi),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,8(%rdi) + mov 16(%rdi),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,12(%rdi) + mov 20(%rdi),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,16(%rdi) + mov 24(%rdi),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,20(%rdi) + mov 28(%rdi),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,24(%rdi) + mov %edx,28(%rdi) + jmp .L150 +.L149: cmp $7,%rsi + jne .L151 +.L150: mov (%r12),%edx + mov 4(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,(%r12) + mov 8(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,4(%r12) + mov 12(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,8(%r12) + mov 16(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,12(%r12) + mov 20(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,16(%r12) + mov 24(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,20(%r12) + mov %edx,24(%r12) + jmp .L152 +.L151: cmp $6,%rsi + jne .L153 +.L152: mov (%r12),%eax + mov 4(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,(%r12) + mov 8(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,4(%r12) + mov 12(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,8(%r12) + mov 16(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,12(%r12) + mov 20(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,16(%r12) + mov %edx,20(%r12) + jmp .L154 +.L153: cmp $5,%rsi + jne .L155 +.L154: mov (%r12),%edx + mov 4(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,(%r12) + mov 8(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,4(%r12) + mov 12(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,8(%r12) + mov 16(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,12(%r12) + mov %edx,16(%r12) + jmp .L156 +.L155: cmp $4,%rsi + jne .L157 +.L156: mov (%r12),%eax + mov 4(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,(%r12) + mov 8(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,4(%r12) + mov 12(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,8(%r12) + mov %edx,12(%r12) + jmp .L158 +.L157: cmp $3,%rsi + jne .L159 +.L158: mov (%r12),%edx + mov 4(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,(%r12) + mov 8(%r12),%edx + cmp %edx,%eax + mov %eax,%ecx + cmovg %edx,%eax + cmovg %ecx,%edx + mov %eax,4(%r12) + mov %edx,8(%r12) + jmp .L160 +.L159: cmp $2,%rsi + jne .L147 +.L160: mov (%r12),%edx + mov 4(%r12),%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,(%r12) + mov %eax,4(%r12) + jmp .L147 +.L148: lea -1(%rsi),%rax mov $8,%ebx - cmp $8,%rax - jle .L266 - .p2align 4,,10 - .p2align 3 -.L221: mov %rbx,%rax - mov %r14,%r12 - add %rbx,%rbx - sub %rbx,%r12 - cmp %rbx,%r12 - jg .L221 - cmp $128,%rbx - jle .L267 - mov $1,%edx - mov %rbx,%rsi - mov %r15,%rdi - call djbsort$avx2_2power - lea (%r15,%rbx,4),%rdi - mov %r12,%rsi - call djbsort$avx2 - lea 32(%r15),%rax - mov %rax,16(%rsp) - jmp .L230 - .p2align 4,,10 - .p2align 3 -.L228: lea 0(%r13,%r12),%rdx - add %r11,%r12 - sub %r9,%rdx - lea (%r15,%r12,4),%rsi - mov %r8,%rdi - sar $3,%rbx - call minmax_vector - cmp $63,%rbx - jle .L268 -.L230: mov %rbx,%r12 - sar $2,%r12 - mov %r12,%rdx - mov %r14,%rsi - mov %r15,%rdi - call int32_threestages - lea 0(,%r12,4),%rcx - mov %r14,%rdx - sub %rcx,%rdx - lea (%rcx,%rax),%r13 - lea (%r15,%rax,4),%r8 - lea (%r15,%r13,4),%rsi - sub %rax,%rdx - mov %r8,%rdi - mov %rsi,24(%rsp) - mov %rax,%r9 - mov %rax,%r11 - call minmax_vector - cmp %r14,%r13 - mov 24(%rsp),%rsi - lea (%r12,%r12),%r10 - jle .L269 -.L226: mov %r14,%r13 - sub %r10,%r13 - lea (%r11,%r12,2),%rax - mov %r13,%rdx - sub %r9,%rdx - lea (%r15,%rax,4),%rsi - mov %r8,%rdi - call minmax_vector - add %r9,%r10 - cmp %r14,%r10 - jg .L228 - mov %r10,%rax - sub %r12,%rax - mov %r10,%r11 - lea (%r15,%r10,4),%r8 - cmp %rax,%r9 - jge .L247 - sub %r9,%rax - dec %rax - and $-8,%rax - lea (%r15,%r9,4),%rdx - add %rax,%r9 - mov 16(%rsp),%rax - lea (%rax,%r9,4),%rax - .p2align 4,,10 - .p2align 3 -.L229: vmovdqu (%rdx,%r12,4),%ymm0 - vmovdqu (%rdx),%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm2,(%rdx) - vmovdqu %ymm0,(%rdx,%r12,4) - add $32,%rdx - cmp %rdx,%rax - jne .L229 - mov %r10,%r9 - vzeroupper - jmp .L228 - .p2align 4,,10 - .p2align 3 -.L267: mov %rbx,%rdx - sar $2,%rdx - sar $3,%rbx - lea 0(,%rax,4),%r12 - lea 32(%rsp),%r13 - cmp %rbx,%rdx - jle .L224 - vmovdqa .LC4(%rip),%ymm0 - .p2align 4,,10 - .p2align 3 -.L225: mov %rbx,%rax - sal $5,%rax - inc %rbx - vmovdqa %ymm0,0(%r13,%rax) - cmp %rdx,%rbx - jl .L225 - vzeroupper -.L224: sal $2,%r14 - mov %r14,%rdx - mov %r15,%rsi - mov %r13,%rdi - call memcpy + test %rsi,%rax + jne .L162 xor %edx,%edx - mov %r12,%rsi - mov %r13,%rdi - call djbsort$avx2_2power - mov %r14,%rdx + call int32_sort_2power + jmp .L147 +.L162: mov %r13,%r14 + sub %rbx,%r14 + cmp %rbx,%r14 + jle .L199 + add %rbx,%rbx + jmp .L162 +.L199: cmp $128,%rbx + jg .L164 + mov %rbx,%rax + mov %rbx,%rdx + vmovdqa .LC4(%rip),%ymm0 + sar $3,%rax + sar $2,%rdx +.L165: cmp %rdx,%rax + jge .L200 + mov %rax,%rcx + incq %rax + salq $5,%rcx + vmovdqa %ymm0,32(%rsp,%rcx) + jmp .L165 +.L200: xor %eax,%eax +.L167: mov (%r12,%rax,4),%edx + mov %rax,%r14 + mov %edx,32(%rsp,%rax,4) + lea 1(%rax),%rax + cmp %rax,%r13 + jne .L167 + lea (%rbx,%rbx),%rsi + xor %edx,%edx + lea 32(%rsp),%rdi + call int32_sort_2power + xor %eax,%eax +.L168: mov 32(%rsp,%rax,4),%ecx + mov %rax,%rdx + mov %ecx,(%r12,%rax,4) + incq %rax + cmp %rdx,%r14 + jne .L168 + jmp .L147 +.L164: mov %rbx,%rsi + mov %r12,%rdi + mov $1,%edx + call int32_sort_2power + lea (%r12,%rbx,4),%rdi + mov %r14,%rsi + call djbsort$avx2 +.L175: mov %rbx,%r14 mov %r13,%rsi - mov %r15,%rdi - call memcpy -.L263: lea -40(%rbp),%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret - .p2align 4,,10 - .p2align 3 -.L269: lea (%r12,%r9),%rax - cmp %rax,%r9 - jge .L246 - notq %r11 - add %r11,%rax - and $-8,%rax - add %rax,%r9 - mov 16(%rsp),%rax - lea (%r10,%r12),%rdx - mov %r8,%rdi - lea (%rax,%r9,4),%rax - .p2align 4,,10 - .p2align 3 -.L227: vmovdqu (%rdi),%ymm1 - vmovdqu (%rdi,%r10,4),%ymm0 - vmovdqu (%rdi,%r12,4),%ymm4 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu (%rdi,%rdx,4),%ymm1 - vpminsd %ymm4,%ymm1,%ymm3 - vpmaxsd %ymm4,%ymm1,%ymm1 + mov %r12,%rdi + sar $2,%r14 + mov %r14,%rdx + call int32_threestages + lea 0(,%r14,4),%r10 + mov %r13,%rdx + lea (%r10,%rax),%r11 + sub %r10,%rdx + lea (%r12,%rax,4),%rdi + mov %rax,%r9 + sub %rax,%rdx + lea (%r12,%r11,4),%rsi + call minmax_vector + lea (%r14,%r14),%rax + mov %rax,24(%rsp) + cmp %r13,%r11 + jg .L169 + imul $-8,%r14,%rax + lea (%r12,%r10),%rdx + lea (%rdx,%r10),%rcx + lea (%r14,%r9),%r15 + lea (%rcx,%r10),%rdi + add %rdi,%rax + lea (%rax,%r10),%rsi + lea (%rsi,%r10),%r8 +.L170: cmp %r9,%r15 + jle .L201 + vmovdqu (%rcx,%r9,4),%ymm7 + vmovdqu (%rdi,%r9,4),%ymm6 + vpminsd (%r12,%r9,4),%ymm7,%ymm2 + vpminsd (%rdx,%r9,4),%ymm6,%ymm3 + vpmaxsd (%r12,%r9,4),%ymm7,%ymm0 + vpmaxsd (%rdx,%r9,4),%ymm6,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 - vmovdqu %ymm4,(%rdi) - vmovdqu %ymm2,(%rdi,%r12,4) - vmovdqu %ymm3,(%rdi,%r10,4) - vmovdqu %ymm0,(%rdi,%rdx,4) - add $32,%rdi - cmp %rdi,%rax - jne .L227 - mov %rsi,%r8 + vmovdqu %ymm4,(%r12,%r9,4) + vmovdqu %ymm2,(%rax,%r9,4) + vmovdqu %ymm3,(%rsi,%r9,4) + vmovdqu %ymm0,(%r8,%r9,4) + add $8,%r9 + jmp .L170 +.L201: mov %r11,%r9 +.L169: mov 24(%rsp),%rax + lea (%r14,%r14),%r15 mov %r13,%r11 - mov %r13,%r9 - vzeroupper - jmp .L226 - .p2align 4,,10 - .p2align 3 -.L268: cmp $32,%rbx - je .L270 - mov %r15,%r10 - cmp $16,%rbx - je .L249 - mov $32,%ebx - xor %r11d,%r11d - mov $15,%eax - xor %r9d,%r9d -.L237: cmp %rax,%r14 - jle .L239 - mov %r9,%rax - .p2align 4,,10 - .p2align 3 -.L240: vmovdqu 32(%r15,%rax,4),%ymm0 - vmovdqu (%r15,%rax,4),%ymm2 + lea (%r12,%r9,4),%rdi + sub %r15,%r11 + add %r9,%rax + mov %r11,%rdx + lea (%r12,%rax,4),%rsi + sub %r9,%rdx + call minmax_vector + lea (%r15,%r9),%rax + cmp %r13,%rax + jg .L172 mov %rax,%rdx - vpminsd %ymm0,%ymm2,%ymm1 - vpmaxsd %ymm0,%ymm2,%ymm2 - vperm2i128 $32,%ymm2,%ymm1,%ymm0 - vperm2i128 $49,%ymm2,%ymm1,%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vperm2i128 $32,%ymm0,%ymm2,%ymm1 - vperm2i128 $49,%ymm0,%ymm2,%ymm2 - vpunpcklqdq %ymm2,%ymm1,%ymm0 - vpunpckhqdq %ymm2,%ymm1,%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vpunpckldq %ymm0,%ymm2,%ymm1 - vpunpckhdq %ymm0,%ymm2,%ymm2 - vpunpcklqdq %ymm2,%ymm1,%ymm0 - vpunpckhqdq %ymm2,%ymm1,%ymm1 - vpminsd %ymm1,%ymm0,%ymm2 - vpmaxsd %ymm1,%ymm0,%ymm0 - vpunpckldq %ymm0,%ymm2,%ymm1 - add $31,%rdx - vpunpckhdq %ymm0,%ymm2,%ymm0 - vmovdqu %ymm1,(%r15,%rax,4) - vmovdqu %ymm0,32(%r15,%rax,4) - add $16,%rax - cmp %rdx,%r14 - jg .L240 - lea -16(%r14),%rax - sub %r9,%rax - lea 15(%r9),%rdx - and $-16,%rax - cmp %rdx,%r14 - mov $0,%edx - cmovle %rdx,%rax - lea 16(%r9,%rax),%r9 - mov %r9,%r11 - lea 32(,%r9,4),%rbx - lea (%r15,%r9,4),%r10 - vzeroupper -.L239: mov 8(%rsp),%rdx - lea (%r15,%rbx),%rsi + add %r12,%r10 + sub %r14,%rdx +.L173: cmp %r9,%rdx + jle .L202 + vmovdqu (%r10,%r9,4),%ymm6 + vpminsd (%r12,%r9,4),%ymm6,%ymm1 + vpmaxsd (%r12,%r9,4),%ymm6,%ymm0 + vmovdqu %ymm1,(%r12,%r9,4) + vmovdqu %ymm0,(%r10,%r9,4) + add $8,%r9 + jmp .L173 +.L202: mov %rax,%r9 +.L172: lea (%r11,%r14),%rdx + add %r9,%r14 + lea (%r12,%r9,4),%rdi + sar $3,%rbx sub %r9,%rdx - mov %r10,%rdi + lea (%r12,%r14,4),%rsi call minmax_vector - lea 16(,%r11,4),%rax - lea 7(%r9),%rdx - lea (%r15,%rax),%rsi - cmp %r14,%rdx - jge .L241 - mov (%r10),%ebx - cmp (%rsi),%ebx - cmovg (%rsi),%ecx - cmovg %ebx,%edx - mov %ecx,(%r10) - mov %edx,(%rsi) - lea -12(%r15,%rax),%rbx - lea 4(%r15,%rax),%rdi - mov (%rbx),%edx - cmp (%rdi),%edx - cmovg (%rdi),%ecx - cmovg %edx,%edx - mov %ecx,(%rbx) - mov %edx,(%rdi) - lea -8(%r15,%rax),%r11 - lea 8(%r15,%rax),%rdx - mov (%r11),%ecx - cmp (%rdx),%ecx - cmovg (%rdx),%r12d - cmovg %ecx,%ecx - mov %r12d,(%r11) - mov %ecx,(%rdx) - lea -4(%r15,%rax),%rcx - lea 12(%r15,%rax),%rax - mov (%rcx),%r13d - cmp (%rax),%r13d - cmovg (%rax),%r8d - cmovg %r13d,%r13d - mov %r8d,(%rcx) - mov %r13d,(%rax) - cmp %r12d,(%r10) - cmovg %r12d,%r13d - cmovg (%r10),%r12d - mov %r13d,(%r10) - mov %r12d,(%r11) - mov (%rbx),%r8d - cmp (%rcx),%r8d - cmovg (%rcx),%r12d - cmovg %r8d,%r13d - mov %r12d,(%rbx) - mov %r13d,(%rcx) - cmp %r12d,(%r10) - cmovg %r12d,%r13d - cmovg (%r10),%r12d - mov %r13d,(%r10) - mov %r12d,(%rbx) - mov (%r11),%r8d - cmp (%rcx),%r8d - cmovg (%rcx),%ebx - cmovg %r8d,%r10d - mov %ebx,(%r11) - mov %r10d,(%rcx) - lea 8(%r9),%r11 - mov (%rsi),%ecx - cmp (%rdx),%ecx - cmovg (%rdx),%r10d - cmovg %ecx,%ecx - mov %r10d,(%rsi) - mov %ecx,(%rdx) - mov (%rdi),%ebx - cmp (%rax),%ebx - cmovg (%rax),%ecx - cmovg %ebx,%r10d - mov %ecx,(%rdi) - mov %r10d,(%rax) - cmp %ecx,(%rsi) - cmovg %ecx,%r10d - cmovg (%rsi),%ecx - mov %r10d,(%rsi) - mov %ecx,(%rdi) - mov (%rdx),%ecx - cmp (%rax),%ecx - cmovg (%rax),%esi - cmovg %ecx,%ecx - mov %esi,(%rdx) - mov %ecx,(%rax) - lea 48(,%r9,4),%rax - lea (%r15,%rax),%rsi - lea -16(%r15,%rax),%r10 - mov %r11,%r9 -.L241: lea -4(%r14),%rdx - sub %r9,%rdx - mov %r10,%rdi - call minmax_vector - lea 3(%r9),%rax - cmp %r14,%rax - jge .L242 - lea 8(,%r11,4),%rax - lea (%r15,%rax),%rdx - mov (%r10),%ecx - cmp (%rdx),%ecx - cmovg (%rdx),%esi - cmovg %ecx,%ecx - mov %esi,(%r10) - mov %ecx,(%rdx) - lea -4(%r15,%rax),%rsi - lea 4(%r15,%rax),%rax - mov (%rsi),%ebx - cmp (%rax),%ebx - cmovg (%rax),%ecx - cmovg %ebx,%edi - mov %ecx,(%rsi) - mov %edi,(%rax) - cmp %ecx,(%r10) - cmovg %ecx,%edi - cmovg (%r10),%ecx - mov %edi,(%r10) - mov %ecx,(%rsi) - add $4,%r9 - mov (%rdx),%ecx - cmp (%rax),%ecx - cmovg (%rax),%esi - cmovg %ecx,%ecx - mov %esi,(%rdx) - mov %ecx,(%rax) -.L242: lea 2(%r9),%rax - cmp %r14,%rax - jge .L243 - lea 0(,%r9,4),%rax - lea (%r15,%rax),%rdx - lea 8(%r15,%rax),%rax - mov (%rdx),%ecx - cmp (%rax),%ecx - cmovg (%rax),%esi - cmovg %ecx,%ecx - mov %esi,(%rdx) - mov %ecx,(%rax) -.L243: lea 1(%r9),%rax - cmp %r14,%rax - jge .L263 - sal $2,%r9 - lea (%r15,%r9),%rdx - lea 4(%r15,%r9),%rax - mov (%rdx),%ecx - cmp (%rax),%ecx - cmovg (%rax),%esi - cmovg %ecx,%ecx - mov %esi,(%rdx) - mov %ecx,(%rax) - lea -40(%rbp),%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret - .p2align 4,,10 - .p2align 3 -.L265: je .L271 - cmp $7,%rsi - je .L272 - cmp $6,%rsi - je .L273 - cmp $5,%rsi - je .L274 - cmp $4,%rsi - je .L275 - cmp $3,%rsi - je .L276 - cmp $2,%rsi - jne .L263 - mov (%rdi),%edx - mov 4(%rdi),%ecx - jmp .L217 - .p2align 4,,10 - .p2align 3 -.L271: mov (%rdi),%ecx - cmp 4(%rdi),%ecx - cmovg 4(%rdi),%eax - cmovg %ecx,%ecx - mov %eax,%edx - cmp 8(%rdi),%ecx - cmovg 8(%rdi),%eax - cmovg %ecx,%ecx - mov %eax,%esi - cmp 12(%rdi),%ecx - cmovg 12(%rdi),%eax - cmovg %ecx,%ecx - mov %eax,%r9d - cmp 16(%rdi),%ecx - cmovg 16(%rdi),%eax - cmovg %ecx,%ecx - mov %eax,%edi - cmp 20(%r15),%ecx - cmovg 20(%r15),%eax - cmovg %ecx,%ecx - mov %eax,%r8d - cmp 24(%r15),%ecx - cmovg 24(%r15),%eax - cmovg %ecx,%ecx - mov %eax,%r10d - cmp 28(%r15),%ecx - cmovg 28(%r15),%eax - cmovg %ecx,%ecx - mov %ecx,28(%r15) - mov %eax,%r11d -.L207: cmp %esi,%edx - cmovg %esi,%ecx - cmovg %edx,%eax - cmp %r9d,%eax - cmovg %r9d,%esi - cmovg %eax,%eax - cmp %edi,%eax - cmovg %edi,%r9d - cmovg %eax,%eax - cmp %r8d,%eax - cmovg %r8d,%edi - cmovg %eax,%eax - cmp %r10d,%eax - cmovg %r10d,%r8d - cmovg %eax,%eax - cmp %r11d,%eax - cmovg %r11d,%r10d - cmovg %eax,%eax - mov %eax,24(%r15) - mov %ecx,%edx -.L209: cmp %esi,%edx - cmovg %esi,%ecx - cmovg %edx,%edx - cmp %r9d,%edx - cmovg %r9d,%esi - cmovg %edx,%eax - cmp %edi,%eax - cmovg %edi,%r9d - cmovg %eax,%eax - cmp %r8d,%eax - cmovg %r8d,%edi - cmovg %eax,%eax - cmp %r10d,%eax - cmovg %r10d,%r8d - cmovg %eax,%eax - mov %eax,20(%r15) -.L211: cmp %esi,%ecx - cmovg %esi,%edx - cmovg %ecx,%ecx - cmp %r9d,%ecx - cmovg %r9d,%esi - cmovg %ecx,%eax - mov %esi,%ecx - cmp %edi,%eax - cmovg %edi,%esi - cmovg %eax,%eax - cmp %r8d,%eax - cmovg %r8d,%edi - cmovg %eax,%eax - mov %eax,16(%r15) -.L213: cmp %ecx,%edx - cmovg %ecx,%eax - cmovg %edx,%edx - cmp %esi,%edx - cmovg %esi,%ecx - cmovg %edx,%edx - cmp %edi,%edx - cmovg %edi,%esi - cmovg %edx,%edx - mov %edx,12(%r15) -.L215: cmp %ecx,%eax - cmovg %ecx,%edx - cmovg %eax,%eax - cmp %esi,%eax - cmovg %esi,%ecx - cmovg %eax,%eax - mov %eax,8(%r15) -.L217: cmp %ecx,%edx - cmovg %ecx,%eax - cmovg %edx,%edx - mov %eax,(%r15) - mov %edx,4(%r15) - lea -40(%rbp),%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret -.L249: mov $64,%r12d - mov $32,%ebx - xor %r11d,%r11d - mov $31,%r8d - xor %r9d,%r9d -.L236: lea (%r15,%r9,4),%rax - mov %r9,%rcx - cmp %r8,%r14 - jle .L235 - .p2align 4,,10 - .p2align 3 -.L238: vmovdqu 64(%rax),%ymm1 - vmovdqu 96(%rax),%ymm3 - vmovdqu (%rax),%ymm0 - vmovdqu 32(%rax),%ymm2 - vpminsd %ymm1,%ymm0,%ymm5 - vpmaxsd %ymm1,%ymm0,%ymm0 - vpminsd %ymm3,%ymm2,%ymm1 - vpmaxsd %ymm3,%ymm2,%ymm2 - vpminsd %ymm2,%ymm0,%ymm4 - vpminsd %ymm1,%ymm5,%ymm3 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpmaxsd %ymm1,%ymm5,%ymm5 - vperm2i128 $32,%ymm0,%ymm4,%ymm2 - vperm2i128 $32,%ymm5,%ymm3,%ymm1 - vperm2i128 $49,%ymm0,%ymm4,%ymm0 - vperm2i128 $49,%ymm5,%ymm3,%ymm3 - vpminsd %ymm3,%ymm1,%ymm5 - vpminsd %ymm0,%ymm2,%ymm4 - vpmaxsd %ymm3,%ymm1,%ymm1 - vpmaxsd %ymm0,%ymm2,%ymm0 - vperm2i128 $32,%ymm1,%ymm5,%ymm3 - vperm2i128 $32,%ymm0,%ymm4,%ymm2 - vperm2i128 $49,%ymm1,%ymm5,%ymm5 - vperm2i128 $49,%ymm0,%ymm4,%ymm4 - vpunpcklqdq %ymm5,%ymm3,%ymm1 - vpunpcklqdq %ymm4,%ymm2,%ymm0 - vpunpckhqdq %ymm5,%ymm3,%ymm3 - vpunpckhqdq %ymm4,%ymm2,%ymm2 - vpminsd %ymm3,%ymm1,%ymm5 - vpminsd %ymm2,%ymm0,%ymm4 - vpmaxsd %ymm3,%ymm1,%ymm1 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpunpckldq %ymm1,%ymm5,%ymm3 - vpunpckldq %ymm0,%ymm4,%ymm2 - vpunpckhdq %ymm1,%ymm5,%ymm5 - vpunpckhdq %ymm0,%ymm4,%ymm4 - vpunpcklqdq %ymm5,%ymm3,%ymm1 - vpunpcklqdq %ymm4,%ymm2,%ymm0 - vpunpckhqdq %ymm5,%ymm3,%ymm3 - vpunpckhqdq %ymm4,%ymm2,%ymm2 - mov %rcx,%rdx - vpminsd %ymm3,%ymm1,%ymm4 - vpmaxsd %ymm3,%ymm1,%ymm1 - vpminsd %ymm2,%ymm0,%ymm3 - vpmaxsd %ymm2,%ymm0,%ymm0 - vpunpckldq %ymm1,%ymm4,%ymm5 - vpunpckldq %ymm0,%ymm3,%ymm2 - vpunpckhdq %ymm1,%ymm4,%ymm1 - vpunpckhdq %ymm0,%ymm3,%ymm0 - add $63,%rdx - vmovdqu %ymm5,(%rax) - vmovdqu %ymm1,32(%rax) - vmovdqu %ymm2,64(%rax) - vmovdqu %ymm0,96(%rax) - add $32,%rcx - sub $-128,%rax - cmp %rdx,%r14 - jg .L238 - lea -32(%r14),%rax - sub %r9,%rax - lea 31(%r9),%rdx - and $-32,%rax - cmp %rdx,%r14 - mov $0,%edx - cmovle %rdx,%rax - lea 32(%r9,%rax),%r9 - lea 64(,%r9,4),%r12 - mov %r9,%r11 - lea (%r15,%r9,4),%r10 - lea -32(%r12),%rbx - vzeroupper -.L235: lea -16(%r14),%rdx - sub %r9,%rdx - lea (%r15,%r12),%rsi - mov %r10,%rdi - call minmax_vector - lea 15(%r9),%rax - jmp .L237 - .p2align 4,,10 - .p2align 3 -.L220: xor %edx,%edx - call djbsort$avx2_2power - lea -40(%rbp),%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret - .p2align 4,,10 - .p2align 3 -.L246: mov %rsi,%r8 - mov %r13,%r11 - mov %r13,%r9 - jmp .L226 - .p2align 4,,10 - .p2align 3 -.L247: mov %r10,%r9 - jmp .L228 - .p2align 4,,10 - .p2align 3 -.L266: vmovdqa .LC4(%rip),%ymm0 - mov $16,%r12d - lea 32(%rsp),%r13 - vmovdqa %ymm0,64(%rsp) - vzeroupper - jmp .L224 - .p2align 4,,10 - .p2align 3 -.L270: cmp $63,%r14 - jle .L248 - lea -64(%r14),%rcx - shr $6,%rcx - mov %rcx,%rdx - sal $8,%rdx - mov %r15,%rax - lea 256(%r15,%rdx),%rdx - .p2align 4,,10 - .p2align 3 -.L233: vmovdqu 128(%rax),%ymm0 - vmovdqu (%rax),%ymm3 - vmovdqu 32(%rax),%ymm15 - vpminsd %ymm0,%ymm3,%ymm13 - vpmaxsd %ymm0,%ymm3,%ymm3 - vmovdqu 160(%rax),%ymm0 - vmovdqu 224(%rax),%ymm2 - vmovdqu 64(%rax),%ymm6 - vmovdqu 96(%rax),%ymm5 - vpminsd %ymm0,%ymm15,%ymm4 - vpmaxsd %ymm0,%ymm15,%ymm15 - vmovdqu 192(%rax),%ymm0 + cmp $63,%rbx + jg .L175 + cmp $32,%rbx + jne .L176 + mov %r12,%rax + mov $63,%edx +.L177: cmp %r13,%rdx + jge .L203 + vmovdqu (%rax),%ymm6 + add $64,%rdx add $256,%rax - vpminsd %ymm0,%ymm6,%ymm1 - vpmaxsd %ymm0,%ymm6,%ymm6 - vpminsd %ymm2,%ymm5,%ymm0 - vpmaxsd %ymm2,%ymm5,%ymm5 - vpminsd %ymm0,%ymm4,%ymm11 - vpminsd %ymm1,%ymm13,%ymm14 - vpmaxsd %ymm0,%ymm4,%ymm4 - vpminsd %ymm5,%ymm15,%ymm12 - vpminsd %ymm6,%ymm3,%ymm0 - vpmaxsd %ymm5,%ymm15,%ymm15 - vpmaxsd %ymm6,%ymm3,%ymm3 - vpmaxsd %ymm1,%ymm13,%ymm13 - vpminsd %ymm15,%ymm3,%ymm8 - vpminsd %ymm4,%ymm13,%ymm1 - vpminsd %ymm12,%ymm0,%ymm5 - vpmaxsd %ymm4,%ymm13,%ymm13 - vpminsd %ymm11,%ymm14,%ymm2 - vpmaxsd %ymm12,%ymm0,%ymm12 - vpmaxsd %ymm11,%ymm14,%ymm14 - vpmaxsd %ymm15,%ymm3,%ymm3 - vperm2i128 $32,%ymm14,%ymm2,%ymm11 - vperm2i128 $32,%ymm13,%ymm1,%ymm10 - vperm2i128 $32,%ymm12,%ymm5,%ymm9 - vperm2i128 $49,%ymm12,%ymm5,%ymm0 - vperm2i128 $32,%ymm3,%ymm8,%ymm4 - vperm2i128 $49,%ymm14,%ymm2,%ymm2 - vperm2i128 $49,%ymm13,%ymm1,%ymm1 - vperm2i128 $49,%ymm3,%ymm8,%ymm3 - vpminsd %ymm2,%ymm11,%ymm15 - vpminsd %ymm1,%ymm10,%ymm14 - vpmaxsd %ymm2,%ymm11,%ymm2 - vpmaxsd %ymm1,%ymm10,%ymm1 - vpminsd %ymm0,%ymm9,%ymm13 - vpminsd %ymm3,%ymm4,%ymm12 - vpmaxsd %ymm0,%ymm9,%ymm0 - vpmaxsd %ymm3,%ymm4,%ymm8 - vperm2i128 $49,%ymm2,%ymm15,%ymm11 - vperm2i128 $49,%ymm1,%ymm14,%ymm10 - vperm2i128 $49,%ymm0,%ymm13,%ymm9 - vperm2i128 $32,%ymm2,%ymm15,%ymm7 - vperm2i128 $32,%ymm1,%ymm14,%ymm6 - vperm2i128 $32,%ymm0,%ymm13,%ymm5 + vpminsd -128(%rax),%ymm6,%ymm10 + vpmaxsd -128(%rax),%ymm6,%ymm8 + vmovdqu -224(%rax),%ymm6 + vpminsd -96(%rax),%ymm6,%ymm3 + vpmaxsd -96(%rax),%ymm6,%ymm0 + vmovdqu -192(%rax),%ymm6 + vpminsd -64(%rax),%ymm6,%ymm2 + vpmaxsd -64(%rax),%ymm6,%ymm1 + vmovdqu -160(%rax),%ymm6 + vpmaxsd -32(%rax),%ymm6,%ymm4 + vpminsd -32(%rax),%ymm6,%ymm13 + vpminsd %ymm2,%ymm10,%ymm15 + vpminsd %ymm1,%ymm8,%ymm12 + vpminsd %ymm13,%ymm3,%ymm11 + vpminsd %ymm4,%ymm0,%ymm5 + vpmaxsd %ymm1,%ymm8,%ymm1 + vpmaxsd %ymm2,%ymm10,%ymm2 + vpmaxsd %ymm13,%ymm3,%ymm13 + vpmaxsd %ymm4,%ymm0,%ymm0 + vpminsd %ymm13,%ymm2,%ymm10 + vpminsd %ymm0,%ymm1,%ymm4 + vpminsd %ymm5,%ymm12,%ymm9 + vpminsd %ymm11,%ymm15,%ymm14 + vpmaxsd %ymm13,%ymm2,%ymm13 + vpmaxsd %ymm0,%ymm1,%ymm0 + vpmaxsd %ymm11,%ymm15,%ymm15 + vpmaxsd %ymm5,%ymm12,%ymm12 + vperm2i128 $32,%ymm13,%ymm10,%ymm6 + vperm2i128 $32,%ymm12,%ymm9,%ymm5 + vperm2i128 $32,%ymm0,%ymm4,%ymm8 + vperm2i128 $32,%ymm15,%ymm14,%ymm11 + vperm2i128 $49,%ymm0,%ymm4,%ymm0 + vperm2i128 $49,%ymm12,%ymm9,%ymm12 + vperm2i128 $49,%ymm15,%ymm14,%ymm14 + vperm2i128 $49,%ymm13,%ymm10,%ymm13 + vpminsd %ymm14,%ymm11,%ymm3 + vpminsd %ymm12,%ymm5,%ymm1 + vpminsd %ymm13,%ymm6,%ymm2 + vpmaxsd %ymm12,%ymm5,%ymm9 + vpmaxsd %ymm14,%ymm11,%ymm11 + vpminsd %ymm0,%ymm8,%ymm12 + vperm2i128 $32,%ymm9,%ymm1,%ymm5 + vpmaxsd %ymm0,%ymm8,%ymm8 + vpmaxsd %ymm13,%ymm6,%ymm10 + vperm2i128 $32,%ymm11,%ymm3,%ymm7 + vperm2i128 $32,%ymm10,%ymm2,%ymm6 + vperm2i128 $49,%ymm11,%ymm3,%ymm11 + vperm2i128 $49,%ymm10,%ymm2,%ymm10 + vperm2i128 $49,%ymm9,%ymm1,%ymm9 vperm2i128 $32,%ymm8,%ymm12,%ymm4 vperm2i128 $49,%ymm8,%ymm12,%ymm8 vpunpcklqdq %ymm11,%ymm7,%ymm3 @@ -2218,14 +432,14 @@ djbsort$avx2: vpmaxsd %ymm3,%ymm7,%ymm7 vpmaxsd %ymm2,%ymm6,%ymm6 vpmaxsd %ymm1,%ymm5,%ymm5 - vpmaxsd %ymm0,%ymm4,%ymm4 vpunpckldq %ymm7,%ymm11,%ymm3 - vpunpckldq %ymm6,%ymm10,%ymm2 + vpmaxsd %ymm0,%ymm4,%ymm4 vpunpckhdq %ymm7,%ymm11,%ymm7 - vpunpckhdq %ymm6,%ymm10,%ymm6 + vpunpckldq %ymm6,%ymm10,%ymm2 vpunpckldq %ymm5,%ymm9,%ymm1 - vpunpckldq %ymm4,%ymm8,%ymm0 + vpunpckhdq %ymm6,%ymm10,%ymm6 vpunpckhdq %ymm5,%ymm9,%ymm5 + vpunpckldq %ymm4,%ymm8,%ymm0 vpunpckhdq %ymm4,%ymm8,%ymm4 vpunpcklqdq %ymm7,%ymm3,%ymm10 vpunpcklqdq %ymm5,%ymm1,%ymm8 @@ -2249,9 +463,9 @@ djbsort$avx2: vpunpckhdq %ymm3,%ymm11,%ymm3 vpunpckldq %ymm1,%ymm5,%ymm6 vpunpckhdq %ymm1,%ymm5,%ymm1 + vmovdqu %ymm8,-256(%rax) vpunpckldq %ymm0,%ymm4,%ymm5 vpunpckhdq %ymm0,%ymm4,%ymm0 - vmovdqu %ymm8,-256(%rax) vmovdqu %ymm3,-224(%rax) vmovdqu %ymm7,-192(%rax) vmovdqu %ymm2,-160(%rax) @@ -2259,69 +473,1617 @@ djbsort$avx2: vmovdqu %ymm1,-96(%rax) vmovdqu %ymm5,-64(%rax) vmovdqu %ymm0,-32(%rax) - cmp %rax,%rdx - jne .L233 - lea 1(%rcx),%rax - mov %rax,%r9 - sal $6,%r9 - lea 128(,%r9,4),%rcx - sal $8,%rax - mov %r9,%r11 - lea (%r15,%rax),%r10 - lea -96(%rcx),%rbx - lea -64(%rcx),%r12 - lea 31(%r9),%r8 - vzeroupper -.L232: lea -32(%r14),%rdx + jmp .L177 +.L203: mov %r13,%rdi + mov %r13,%r9 + lea -32(%r13),%rdx + shr $6,%rdi + andq $-64,%r9 + salq $8,%rdi sub %r9,%rdx - lea (%r15,%rcx),%rsi + lea 128(%r12,%rdi),%rsi + add %r12,%rdi + call minmax_vector + jmp .L180 +.L176: xor %r10d,%r10d + cmp $16,%rbx + jne .L181 + xor %r9d,%r9d +.L180: lea 31(%r9),%rax +.L179: cmp %r13,%rax + jge .L204 + vmovdqu -124(%r12,%rax,4),%ymm6 + vpminsd -60(%r12,%rax,4),%ymm6,%ymm5 + vpmaxsd -60(%r12,%rax,4),%ymm6,%ymm0 + vmovdqu -92(%r12,%rax,4),%ymm6 + vpminsd -28(%r12,%rax,4),%ymm6,%ymm1 + vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2 + vpminsd %ymm1,%ymm5,%ymm3 + vpminsd %ymm2,%ymm0,%ymm4 + vpmaxsd %ymm1,%ymm5,%ymm5 + vpmaxsd %ymm2,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm4,%ymm2 + vperm2i128 $32,%ymm5,%ymm3,%ymm1 + vperm2i128 $49,%ymm0,%ymm4,%ymm0 + vperm2i128 $49,%ymm5,%ymm3,%ymm3 + vpminsd %ymm0,%ymm2,%ymm4 + vpmaxsd %ymm0,%ymm2,%ymm0 + vpminsd %ymm3,%ymm1,%ymm5 + vpmaxsd %ymm3,%ymm1,%ymm1 + vperm2i128 $32,%ymm0,%ymm4,%ymm2 + vperm2i128 $32,%ymm1,%ymm5,%ymm3 + vperm2i128 $49,%ymm0,%ymm4,%ymm4 + vperm2i128 $49,%ymm1,%ymm5,%ymm5 + vpunpcklqdq %ymm5,%ymm3,%ymm1 + vpunpcklqdq %ymm4,%ymm2,%ymm0 + vpunpckhqdq %ymm5,%ymm3,%ymm3 + vpunpckhqdq %ymm4,%ymm2,%ymm2 + vpminsd %ymm3,%ymm1,%ymm5 + vpmaxsd %ymm3,%ymm1,%ymm1 + vpminsd %ymm2,%ymm0,%ymm4 + vpmaxsd %ymm2,%ymm0,%ymm0 + vpunpckldq %ymm1,%ymm5,%ymm3 + vpunpckldq %ymm0,%ymm4,%ymm2 + vpunpckhdq %ymm1,%ymm5,%ymm5 + vpunpckhdq %ymm0,%ymm4,%ymm4 + vpunpcklqdq %ymm5,%ymm3,%ymm1 + vpunpcklqdq %ymm4,%ymm2,%ymm0 + vpunpckhqdq %ymm5,%ymm3,%ymm3 + vpunpckhqdq %ymm4,%ymm2,%ymm2 + vpminsd %ymm3,%ymm1,%ymm4 + vpmaxsd %ymm3,%ymm1,%ymm1 + vpminsd %ymm2,%ymm0,%ymm3 + vpmaxsd %ymm2,%ymm0,%ymm0 + vpunpckldq %ymm1,%ymm4,%ymm5 + vpunpckldq %ymm0,%ymm3,%ymm2 + vpunpckhdq %ymm1,%ymm4,%ymm1 + vpunpckhdq %ymm0,%ymm3,%ymm0 + vmovdqu %ymm5,-124(%r12,%rax,4) + vmovdqu %ymm1,-92(%r12,%rax,4) + vmovdqu %ymm2,-60(%r12,%rax,4) + vmovdqu %ymm0,-28(%r12,%rax,4) + add $32,%rax + jmp .L179 +.L204: mov %r13,%r10 + xor %edx,%edx + lea 0(,%r9,4),%rax + sub %r9,%r10 + mov %r10,%rdi + andq $-32,%r10 + shr $5,%rdi + cmp %r9,%r13 + cmovl %rdx,%r10 + salq $7,%rdi + add %r9,%r10 + cmp %r9,%r13 + cmovl %rdx,%rdi + lea -16(%r13),%rdx + sub %r10,%rdx + lea 64(%rax,%rdi),%rsi + add %rax,%rdi + add %r12,%rsi + add %r12,%rdi + call minmax_vector +.L181: lea 15(%r10),%rax +.L183: cmp %r13,%rax + jge .L205 + vmovdqu -60(%r12,%rax,4),%ymm6 + vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2 + vpminsd -28(%r12,%rax,4),%ymm6,%ymm1 + vperm2i128 $32,%ymm2,%ymm1,%ymm0 + vperm2i128 $49,%ymm2,%ymm1,%ymm1 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm2,%ymm1 + vperm2i128 $49,%ymm0,%ymm2,%ymm2 + vpunpcklqdq %ymm2,%ymm1,%ymm0 + vpunpckhqdq %ymm2,%ymm1,%ymm1 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vpunpckldq %ymm0,%ymm2,%ymm1 + vpunpckhdq %ymm0,%ymm2,%ymm2 + vpunpcklqdq %ymm2,%ymm1,%ymm0 + vpunpckhqdq %ymm2,%ymm1,%ymm1 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vpunpckldq %ymm0,%ymm2,%ymm1 + vpunpckhdq %ymm0,%ymm2,%ymm0 + vmovdqu %ymm1,-60(%r12,%rax,4) + vmovdqu %ymm0,-28(%r12,%rax,4) + add $16,%rax + jmp .L183 +.L205: mov %r13,%r9 + xor %edx,%edx + lea 0(,%r10,4),%rcx + sub %r10,%r9 + mov %r9,%rax + andq $-16,%r9 + shr $4,%rax + cmp %r10,%r13 + cmovl %rdx,%r9 + salq $6,%rax + add %r10,%r9 + cmp %r10,%r13 + cmovl %rdx,%rax + lea -8(%r13),%rdx + sub %r9,%rdx + lea (%rax,%rcx),%r10 + lea 32(%rcx,%rax),%rsi + add %r12,%r10 + add %r12,%rsi mov %r10,%rdi call minmax_vector - jmp .L236 -.L272: mov (%rdi),%edx - mov 4(%rdi),%esi - mov 8(%rdi),%r9d - mov 16(%r15),%r8d - mov 12(%rdi),%edi - mov 20(%r15),%r10d - mov 24(%r15),%r11d - jmp .L207 -.L248: mov %r15,%r10 - mov $64,%r12d - mov $32,%ebx - mov $31,%r8d - mov $128,%ecx - xor %r11d,%r11d - xor %r9d,%r9d - jmp .L232 -.L276: mov (%rdi),%eax - mov 4(%rdi),%ecx - mov 8(%rdi),%esi - jmp .L215 -.L275: mov (%rdi),%edx - mov 4(%rdi),%ecx - mov 8(%rdi),%esi - mov 12(%rdi),%edi - jmp .L213 -.L274: mov (%rdi),%ecx - mov 4(%rdi),%esi - mov 8(%rdi),%r9d - mov 16(%r15),%r8d - mov 12(%rdi),%edi - jmp .L211 -.L273: mov (%rdi),%edx - mov 4(%rdi),%esi - mov 8(%rdi),%r9d - mov 16(%r15),%r8d - mov 12(%rdi),%edi - mov 20(%r15),%r10d - jmp .L209 - .endfn djbsort$avx2,globl + lea 7(%r9),%rax + cmp %r13,%rax + jge .L185 + lea 16(,%r9,4),%rax + mov (%r10),%ecx + add $8,%r9 + lea -12(%r12,%rax),%r14 + lea (%r12,%rax),%rbx + lea 4(%r12,%rax),%r11 + mov (%rbx),%edx + lea 8(%r12,%rax),%r8 + cmp %edx,%ecx + mov %ecx,%esi + cmovg %edx,%ecx + cmovg %esi,%edx + mov %ecx,(%r10) + mov %edx,(%rbx) + mov (%r14),%ecx + mov (%r11),%edx + cmp %edx,%ecx + mov %ecx,%esi + cmovg %edx,%ecx + cmovg %esi,%edx + lea -8(%r12,%rax),%rsi + mov %ecx,(%r14) + mov %edx,(%r11) + mov (%rsi),%ecx + mov (%r8),%edx + cmp %edx,%ecx + mov %ecx,%edi + cmovg %edx,%ecx + cmovg %edi,%edx + lea 12(%r12,%rax),%rdi + mov %ecx,(%rsi) + lea -4(%r12,%rax),%rcx + mov %edx,(%r8) + mov (%rcx),%edx + mov (%rdi),%eax + cmp %eax,%edx + mov %edx,%r15d + cmovg %eax,%edx + cmovg %r15d,%eax + mov %edx,(%rcx) + mov %eax,(%rdi) + mov (%r10),%edx + mov (%rsi),%eax + cmp %eax,%edx + mov %edx,%r15d + cmovg %eax,%edx + cmovg %r15d,%eax + mov %edx,(%r10) + mov %eax,(%rsi) + mov (%rcx),%eax + mov (%r14),%edx + cmp %eax,%edx + mov %edx,%r15d + cmovg %eax,%edx + cmovg %r15d,%eax + mov %edx,(%r14) + mov %eax,(%rcx) + mov (%r10),%edx + mov (%r14),%eax + cmp %eax,%edx + mov %edx,%r15d + cmovg %eax,%edx + cmovg %r15d,%eax + mov %edx,(%r10) + mov %eax,(%r14) + mov (%rsi),%edx + mov (%rcx),%eax + cmp %eax,%edx + mov %edx,%r10d + cmovg %eax,%edx + cmovg %r10d,%eax + mov %edx,(%rsi) + mov %eax,(%rcx) + mov (%rbx),%edx + mov (%r8),%esi + mov (%rdi),%ecx + cmp %esi,%edx + mov %edx,%eax + cmovg %esi,%edx + cmovg %eax,%esi + mov (%r11),%eax + cmp %ecx,%eax + mov %eax,%r10d + cmovg %ecx,%eax + cmovg %r10d,%ecx + cmp %eax,%edx + mov %edx,%r10d + cmovg %eax,%edx + cmovg %r10d,%eax + mov %edx,(%rbx) + mov %esi,%edx + mov %eax,(%r11) + mov %ecx,%eax + cmp %eax,%edx + mov %edx,%ecx + cmovg %eax,%edx + cmovg %ecx,%eax + mov %edx,(%r8) + mov %eax,(%rdi) +.L185: lea 4(%r9),%r10 + lea -4(%r13),%rdx + lea 0(,%r10,4),%rbx + sub %r9,%rdx + lea -16(%r12,%rbx),%r11 + lea (%r12,%rbx),%rsi + mov %r11,%rdi + call minmax_vector + lea 3(%r9),%rax + cmp %r13,%rax + jge .L186 + lea -8(%r12,%rbx),%rcx + mov (%r11),%edx + lea -12(%r12,%rbx),%rdi + mov %r10,%r9 + mov (%rcx),%eax + cmp %eax,%edx + mov %edx,%esi + cmovg %eax,%edx + cmovg %esi,%eax + lea -4(%r12,%rbx),%rsi + mov %edx,(%r11) + mov %eax,(%rcx) + mov (%rdi),%edx + mov (%rsi),%eax + cmp %eax,%edx + mov %edx,%r8d + cmovg %eax,%edx + cmovg %r8d,%eax + mov %edx,(%rdi) + mov %eax,(%rsi) + mov (%rdi),%eax + mov (%r11),%edx + cmp %eax,%edx + mov %edx,%r8d + cmovg %eax,%edx + cmovg %r8d,%eax + mov %edx,(%r11) + mov %eax,(%rdi) + mov (%rcx),%edx + mov (%rsi),%eax + cmp %eax,%edx + mov %edx,%edi + cmovg %eax,%edx + cmovg %edi,%eax + mov %edx,(%rcx) + mov %eax,(%rsi) +.L186: lea 2(%r9),%rax + cmp %r13,%rax + jge .L187 + lea 0(,%r9,4),%rax + lea (%r12,%rax),%rsi + lea 8(%r12,%rax),%rcx + mov (%rsi),%edx + mov (%rcx),%eax + cmp %eax,%edx + mov %edx,%edi + cmovg %eax,%edx + cmovg %edi,%eax + mov %edx,(%rsi) + mov %eax,(%rcx) +.L187: lea 1(%r9),%rax + cmp %r13,%rax + jge .L147 + salq $2,%r9 + lea (%r12,%r9),%rsi + lea 4(%r12,%r9),%rcx + mov (%rsi),%edx + mov (%rcx),%eax + cmp %eax,%edx + mov %edx,%edi + cmovg %eax,%edx + cmovg %edi,%eax + mov %edx,(%rsi) + mov %eax,(%rcx) +.L147: lea -40(%rbp),%rsp + pop %rbx + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rbp + ret + .endfn djbsort$avx2,globl,hidden + +minmax_vector: + cmp $7,%rdx + jg .L13 +.L2: test %rdx,%rdx + jle .L15 + mov (%rdi),%ecx + mov (%rsi),%eax + add $4,%rdi + add $4,%rsi + cmp %eax,%ecx + mov %ecx,%r8d + cmovg %eax,%ecx + cmovg %r8d,%eax + decq %rdx + mov %ecx,-4(%rdi) + mov %eax,-4(%rsi) + jmp .L2 +.L15: ret +.L13: testb $7,%dl + je .L6 + lea -32(,%rdx,4),%rax + andq $-8,%rdx + lea (%rdi,%rax),%rcx + add %rsi,%rax + vmovdqu (%rax),%ymm2 + vpminsd (%rcx),%ymm2,%ymm1 + vpmaxsd (%rcx),%ymm2,%ymm0 + vmovdqu %ymm1,(%rcx) + vmovdqu %ymm0,(%rax) +.L6: xor %eax,%eax +.L7: vmovdqu (%rdi,%rax),%ymm4 + vpminsd (%rsi,%rax),%ymm4,%ymm1 + vpmaxsd (%rsi,%rax),%ymm4,%ymm0 + vmovdqu %ymm1,(%rdi,%rax) + vmovdqu %ymm0,(%rsi,%rax) + add $32,%rax + sub $8,%rdx + jne .L7 + ret + .endfn minmax_vector + +int32_twostages_32: + sub $-128,%rdi +.L17: lea -128(%rdi),%rax + test %rsi,%rsi + jle .L21 +.L18: vmovdqu (%rax),%ymm5 + vmovdqu 128(%rax),%ymm7 + add $32,%rax + vpminsd 352(%rax),%ymm7,%ymm3 + vpminsd 224(%rax),%ymm5,%ymm2 + vpmaxsd 224(%rax),%ymm5,%ymm0 + vpmaxsd 352(%rax),%ymm7,%ymm1 + vpminsd %ymm3,%ymm2,%ymm4 + vpmaxsd %ymm3,%ymm2,%ymm2 + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm0 + vmovdqu %ymm4,-32(%rax) + vmovdqu %ymm2,96(%rax) + vmovdqu %ymm3,224(%rax) + vmovdqu %ymm0,352(%rax) + cmp %rax,%rdi + jne .L18 + add $-128,%rsi + add $512,%rdi + jmp .L17 +.L21: ret + .endfn int32_twostages_32 + +int32_threestages: + push %rbp + imul $-24,%rdx,%r8 + lea 0(,%rdx,8),%rax + mov %rsp,%rbp + push %r15 + push %r14 + push %r13 + push %r12 + push %rbx + andq $-32,%rsp + sub $64,%rsp + mov %rax,56(%rsp) + lea 0(,%rdx,4),%rax + lea (%rdi,%rax),%rcx + mov %rsi,8(%rsp) + lea (%rcx,%rax),%rsi + lea (%rsi,%rax),%r9 + lea (%r9,%rax),%r11 + lea (%r11,%rax),%r12 + lea (%r12,%rax),%r14 + lea (%r14,%rax),%r15 + lea (%r15,%r8),%rbx + mov %rbx,40(%rsp) + add %rax,%rbx + lea (%rbx,%rax),%r10 + mov %rbx,32(%rsp) + lea (%r10,%rax),%rbx + lea (%rbx,%rax),%r13 + lea 0(%r13,%rax),%r8 + mov %r8,24(%rsp) + add %r8,%rax + mov %rax,16(%rsp) + xor %eax,%eax +.L23: mov 56(%rsp),%r8 + add %rax,%r8 + mov %r8,48(%rsp) + cmp 8(%rsp),%r8 + jg .L28 +.L25: cmp %rdx,%rax + jge .L29 + vmovdqu (%rdi,%rax,4),%ymm3 + vmovdqu (%rsi,%rax,4),%ymm6 + vpminsd (%r11,%rax,4),%ymm3,%ymm7 + vpmaxsd (%r11,%rax,4),%ymm3,%ymm4 + vpmaxsd (%r14,%rax,4),%ymm6,%ymm0 + vmovdqu (%rcx,%rax,4),%ymm3 + vmovdqu (%rsi,%rax,4),%ymm5 + vpminsd (%r12,%rax,4),%ymm3,%ymm2 + vpmaxsd (%r12,%rax,4),%ymm3,%ymm1 + vpminsd (%r14,%rax,4),%ymm5,%ymm5 + vmovdqu (%r9,%rax,4),%ymm3 + vpminsd (%r15,%rax,4),%ymm3,%ymm6 + vpmaxsd (%r15,%rax,4),%ymm3,%ymm3 + vpminsd %ymm5,%ymm7,%ymm8 + mov 40(%rsp),%r8 + vpmaxsd %ymm5,%ymm7,%ymm5 + vpminsd %ymm6,%ymm2,%ymm7 + vpminsd %ymm7,%ymm8,%ymm9 + vpmaxsd %ymm6,%ymm2,%ymm2 + vpminsd %ymm0,%ymm4,%ymm6 + vpmaxsd %ymm0,%ymm4,%ymm0 + vmovdqu %ymm9,(%rdi,%rax,4) + vpminsd %ymm3,%ymm1,%ymm4 + vpmaxsd %ymm3,%ymm1,%ymm1 + vpmaxsd %ymm7,%ymm8,%ymm3 + vpminsd %ymm2,%ymm5,%ymm7 + vmovdqu %ymm3,(%r8,%rax,4) + mov 32(%rsp),%r8 + vpmaxsd %ymm2,%ymm5,%ymm2 + vpminsd %ymm4,%ymm6,%ymm5 + vpmaxsd %ymm4,%ymm6,%ymm6 + vpminsd %ymm1,%ymm0,%ymm4 + vmovdqu %ymm7,(%r8,%rax,4) + mov 24(%rsp),%r8 + vpmaxsd %ymm1,%ymm0,%ymm0 + vmovdqu %ymm2,(%r10,%rax,4) + vmovdqu %ymm5,(%rbx,%rax,4) + vmovdqu %ymm6,0(%r13,%rax,4) + vmovdqu %ymm4,(%r8,%rax,4) + mov 16(%rsp),%r8 + vmovdqu %ymm0,(%r8,%rax,4) + add $8,%rax + jmp .L25 +.L29: mov 48(%rsp),%rax + add 56(%rsp),%rdx + jmp .L23 +.L28: lea -40(%rbp),%rsp + pop %rbx + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rbp + ret + .endfn int32_threestages + +merge16_finish: + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm3,%ymm2 + vperm2i128 $49,%ymm0,%ymm3,%ymm0 + vpminsd %ymm0,%ymm2,%ymm1 + vpmaxsd %ymm0,%ymm2,%ymm0 + vpunpcklqdq %ymm0,%ymm1,%ymm2 + vpunpckhqdq %ymm0,%ymm1,%ymm0 + vpminsd %ymm0,%ymm2,%ymm1 + vpmaxsd %ymm0,%ymm2,%ymm2 + vpunpckldq %ymm2,%ymm1,%ymm0 + vpunpckhdq %ymm2,%ymm1,%ymm1 + vpunpcklqdq %ymm1,%ymm0,%ymm3 + vpunpckhqdq %ymm1,%ymm0,%ymm0 + vpminsd %ymm3,%ymm0,%ymm2 + vpmaxsd %ymm3,%ymm0,%ymm0 + vpunpckldq %ymm0,%ymm2,%ymm1 + vpunpckhdq %ymm0,%ymm2,%ymm0 + vperm2i128 $32,%ymm0,%ymm1,%ymm2 + vperm2i128 $49,%ymm0,%ymm1,%ymm0 + test %esi,%esi + je .L31 + vpcmpeqd %ymm1,%ymm1,%ymm1 + vpxor %ymm1,%ymm2,%ymm2 + vpxor %ymm1,%ymm0,%ymm0 +.L31: vmovdqu %ymm2,(%rdi) + vmovdqu %ymm0,32(%rdi) + ret + .endfn merge16_finish + +int32_sort_2power: + push %r13 + lea 16(%rsp),%r13 + andq $-32,%rsp + push -8(%r13) + push %rbp + mov %rsp,%rbp + push %r15 + push %r14 + push %r13 + push %r12 + mov %rdi,%r12 + push %rbx + sub $264,%rsp + mov %edx,-116(%rbp) + cmp $8,%rsi + jne .L36 + mov 4(%rdi),%edx + mov (%rdi),%r8d + mov 8(%rdi),%ecx + mov 28(%r12),%r9d + cmp %r8d,%edx + mov %edx,%eax + cmovg %r8d,%edx + cmovg %eax,%r8d + mov 12(%rdi),%eax + cmp %ecx,%eax + mov %eax,%esi + cmovg %ecx,%eax + cmovg %esi,%ecx + cmp %r8d,%ecx + mov %ecx,%esi + cmovg %r8d,%ecx + cmovg %esi,%r8d + cmp %edx,%eax + mov %eax,%esi + cmovg %edx,%eax + cmovg %esi,%edx + mov 20(%rdi),%esi + mov %edx,%r10d + mov 16(%rdi),%edi + cmp %r10d,%ecx + mov %ecx,%edx + cmovg %r10d,%ecx + cmovg %edx,%r10d + cmp %edi,%esi + mov %esi,%edx + cmovg %edi,%esi + cmovg %edx,%edi + mov 24(%r12),%edx + cmp %edx,%r9d + mov %r9d,%r11d + cmovg %edx,%r9d + cmovg %r11d,%edx + cmp %edi,%edx + mov %edx,%r11d + cmovg %edi,%edx + cmovg %r11d,%edi + cmp %esi,%r9d + mov %r9d,%r11d + cmovg %esi,%r9d + cmovg %r11d,%esi + cmp %esi,%edx + mov %edx,%r11d + cmovg %esi,%edx + cmovg %r11d,%esi + cmp %r8d,%edi + mov %edi,%r11d + cmovg %r8d,%edi + cmovg %r11d,%r8d + cmp %ecx,%edx + mov %edx,%r11d + cmovg %ecx,%edx + cmovg %r11d,%ecx + mov %r8d,(%r12) + cmp %ecx,%edi + mov %edi,%r11d + cmovg %ecx,%edi + cmovg %r11d,%ecx + cmp %r10d,%esi + mov %esi,%r11d + cmovg %r10d,%esi + cmovg %r11d,%r10d + cmp %eax,%r9d + mov %r9d,%r11d + cmovg %eax,%r9d + cmovg %r11d,%eax + cmp %eax,%esi + mov %esi,%r11d + cmovg %eax,%esi + cmovg %r11d,%eax + mov %r9d,28(%r12) + cmp %r10d,%ecx + mov %ecx,%r11d + cmovg %r10d,%ecx + cmovg %r11d,%r10d + cmp %eax,%edi + mov %edi,%r11d + cmovg %eax,%edi + cmovg %r11d,%eax + mov %r10d,4(%r12) + cmp %esi,%edx + mov %edx,%r11d + cmovg %esi,%edx + cmovg %r11d,%esi + mov %ecx,8(%r12) + mov %eax,12(%r12) + mov %edi,16(%r12) + mov %esi,20(%r12) + mov %edx,24(%r12) + jmp .L35 +.L36: mov %rsi,%r15 + cmp $16,%rsi + jne .L38 + vmovdqa .LC0(%rip),%ymm0 + vpxor 32(%rdi),%ymm0,%ymm2 + vpxor (%rdi),%ymm0,%ymm0 + vmovdqa .LC1(%rip),%ymm4 + cmp $0,-116(%rbp) + vpunpckldq %ymm2,%ymm0,%ymm1 + vpunpckhdq %ymm2,%ymm0,%ymm0 + vpunpcklqdq %ymm0,%ymm1,%ymm3 + vpunpckhqdq %ymm0,%ymm1,%ymm1 + vpminsd %ymm3,%ymm1,%ymm2 + vpmaxsd %ymm3,%ymm1,%ymm1 + vpxor %ymm4,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm1 + vpunpckldq %ymm1,%ymm2,%ymm0 + vpunpckhdq %ymm1,%ymm2,%ymm1 + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm1 + vpunpcklqdq %ymm1,%ymm3,%ymm2 + vpunpckhqdq %ymm1,%ymm3,%ymm3 + vpunpckldq %ymm3,%ymm2,%ymm1 + vpunpckhdq %ymm3,%ymm2,%ymm2 + vpunpcklqdq %ymm2,%ymm1,%ymm0 + vpunpckhqdq %ymm2,%ymm1,%ymm1 + vpminsd %ymm0,%ymm1,%ymm2 + vpmaxsd %ymm0,%ymm1,%ymm1 + vpunpckldq %ymm1,%ymm2,%ymm0 + vpunpckhdq %ymm1,%ymm2,%ymm1 + vpxor %ymm4,%ymm1,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vperm2i128 $32,%ymm1,%ymm0,%ymm3 + vperm2i128 $49,%ymm1,%ymm0,%ymm0 + vpminsd %ymm3,%ymm0,%ymm2 + vpmaxsd %ymm3,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm2,%ymm1 + vperm2i128 $49,%ymm0,%ymm2,%ymm0 + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm2 + vpunpcklqdq %ymm2,%ymm3,%ymm1 + vpunpckhqdq %ymm2,%ymm3,%ymm2 + vpunpckldq %ymm2,%ymm1,%ymm0 + vpunpckhdq %ymm2,%ymm1,%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm0 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vpunpckldq %ymm0,%ymm2,%ymm1 + vpunpckhdq %ymm0,%ymm2,%ymm0 + vpunpcklqdq %ymm0,%ymm1,%ymm2 + vpunpckhqdq %ymm0,%ymm1,%ymm1 + vpcmpeqd %ymm0,%ymm0,%ymm0 + je .L39 + vpxor %ymm0,%ymm1,%ymm1 + jmp .L40 +.L39: vpxor %ymm0,%ymm2,%ymm2 +.L40: mov -116(%rbp),%esi + vmovdqa %ymm2,%ymm0 + mov %r12,%rdi + jmp .L134 +.L38: cmp $32,%rsi + jne .L41 + mov $1,%edx + mov $16,%esi + lea 64(%r12),%r13 + call int32_sort_2power + xor %edx,%edx + mov $16,%esi + mov %r13,%rdi + call int32_sort_2power + cmp $0,-116(%rbp) + vmovdqu (%r12),%ymm4 + vmovdqu 32(%r12),%ymm1 + vmovdqu 64(%r12),%ymm2 + vmovdqu 96(%r12),%ymm3 + je .L42 + vpcmpeqd %ymm0,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm0,%ymm2,%ymm2 + vpxor %ymm0,%ymm3,%ymm3 +.L42: mov -116(%rbp),%esi + vpmaxsd %ymm1,%ymm3,%ymm5 + vpminsd %ymm4,%ymm2,%ymm0 + mov %r12,%rdi + vpmaxsd %ymm4,%ymm2,%ymm4 + vpminsd %ymm1,%ymm3,%ymm1 + vmovdqa %ymm5,-80(%rbp) + vmovdqa %ymm4,-112(%rbp) + call merge16_finish + vmovdqa -80(%rbp),%ymm5 + mov -116(%rbp),%esi + mov %r13,%rdi + vmovdqa -112(%rbp),%ymm4 + vmovdqa %ymm5,%ymm1 + vmovdqa %ymm4,%ymm0 +.L134: add $264,%rsp + pop %rbx + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rbp + lea -16(%r13),%rsp + pop %r13 + jmp merge16_finish +.L41: mov %rsi,%rax + sar $3,%rax + mov %rax,-80(%rbp) + lea 0(,%rax,4),%r13 + salq $3,%rax + imul $-20,-80(%rbp),%rdx + lea (%rdi,%rax),%rdi + lea (%rdi,%rax),%rsi + lea (%rsi,%rax),%rcx + add %rcx,%rdx + lea (%rdx,%rax),%r9 + lea (%r9,%rax),%r8 + add %r8,%rax + mov %rax,-136(%rbp) + mov %rax,%r10 + xor %eax,%eax +.L43: cmp -80(%rbp),%rax + jge .L135 + add $32,%rdi + add $32,%rsi + add $32,%rcx + add $32,%rdx + vmovdqu (%r12,%rax,4),%ymm5 + add $32,%r9 + add $32,%r8 + add $32,%r10 + vpminsd -32(%rsi),%ymm5,%ymm4 + vpmaxsd -32(%rsi),%ymm5,%ymm2 + vmovdqu -32(%rdi),%ymm5 + vpminsd -32(%rcx),%ymm5,%ymm1 + vpmaxsd -32(%rcx),%ymm5,%ymm0 + vpminsd %ymm2,%ymm0,%ymm3 + vpmaxsd %ymm2,%ymm0,%ymm0 + vpminsd %ymm4,%ymm1,%ymm2 + vpmaxsd %ymm4,%ymm1,%ymm1 + vmovdqu %ymm0,(%r12,%rax,4) + add $8,%rax + vpminsd %ymm1,%ymm3,%ymm4 + vpmaxsd %ymm1,%ymm3,%ymm1 + vmovdqu %ymm4,-32(%rdi) + vmovdqu %ymm1,-32(%rsi) + vmovdqu %ymm2,-32(%rcx) + vmovdqu -32(%r8),%ymm5 + vmovdqu -32(%r10),%ymm6 + vpminsd -32(%rdx),%ymm5,%ymm1 + vpminsd -32(%r9),%ymm6,%ymm3 + vpmaxsd -32(%r9),%ymm6,%ymm2 + vpmaxsd -32(%rdx),%ymm5,%ymm0 + vpminsd %ymm3,%ymm1,%ymm4 + vpmaxsd %ymm3,%ymm1,%ymm1 + vpminsd %ymm2,%ymm0,%ymm3 + vpmaxsd %ymm2,%ymm0,%ymm0 + vmovdqu %ymm4,-32(%rdx) + vpminsd %ymm1,%ymm3,%ymm2 + vpmaxsd %ymm1,%ymm3,%ymm1 + vmovdqu %ymm1,-32(%r9) + vmovdqu %ymm2,-32(%r8) + vmovdqu %ymm0,-32(%r10) + jmp .L43 +.L135: imul $-24,-80(%rbp),%rax + mov %rax,-128(%rbp) + cmp $127,%r15 + jg .L105 +.L63: lea (%r12,%r15,4),%rax + vmovdqa .LC1(%rip),%ymm10 + movl $3,-272(%rbp) + mov $4,%r14d + mov %rax,-144(%rbp) + mov %r15,%rax + vmovdqa .LC3(%rip),%ymm11 + sar $4,%rax + vmovdqa .LC2(%rip),%ymm12 + mov %rax,-112(%rbp) + mov -136(%rbp),%rax + add -128(%rbp),%rax + mov %rax,-200(%rbp) + add %r13,%rax + mov %rax,-192(%rbp) + add %r13,%rax + mov %rax,-184(%rbp) + add %r13,%rax + mov %rax,-176(%rbp) + add %r13,%rax + mov %rax,-168(%rbp) + add %r13,%rax + mov %rax,-160(%rbp) + add %r13,%rax + mov %rax,-152(%rbp) + jmp .L46 +.L105: xor %eax,%eax + vpcmpeqd %ymm0,%ymm0,%ymm0 +.L45: vpxor 64(%r12,%rax,4),%ymm0,%ymm1 + vpxor (%r12,%rax,4),%ymm0,%ymm2 + vmovdqu %ymm1,64(%r12,%rax,4) + vmovdqu %ymm2,(%r12,%rax,4) + add $32,%rax + cmp %rax,%r15 + jg .L45 + mov -136(%rbp),%r14 + add -128(%rbp),%r14 + mov $8,%ebx + vpcmpeqd %ymm10,%ymm10,%ymm10 + lea (%r14,%r13),%rax + mov %rax,-296(%rbp) + add %r13,%rax + lea (%rax,%r13),%r11 + mov %rax,-176(%rbp) + lea (%r11,%r13),%rax + mov %rax,-288(%rbp) + add %r13,%rax + mov %rax,-144(%rbp) + add %r13,%rax + mov %rax,-112(%rbp) + add -128(%rbp),%rax + mov %rax,-200(%rbp) + add %r13,%rax + mov %rax,-192(%rbp) + add %r13,%rax + mov %rax,-184(%rbp) + add %r13,%rax + mov %rax,-168(%rbp) + add %r13,%rax + mov %rax,-160(%rbp) + add %r13,%rax + mov %rax,-152(%rbp) + add %r13,%rax + mov %rax,-280(%rbp) +.L64: mov %rbx,%rcx + sarq %rcx +.L47: cmp $127,%rcx + jle .L136 + mov %rcx,%rdx + mov %r15,%rsi + mov %r12,%rdi + mov %r11,-272(%rbp) + sar $2,%rdx + mov %rcx,-240(%rbp) + call int32_threestages + mov -240(%rbp),%rcx + mov -272(%rbp),%r11 + vpcmpeqd %ymm10,%ymm10,%ymm10 + sar $3,%rcx + jmp .L47 +.L136: cmp $64,%rcx + jne .L49 + mov %r15,%rsi + mov %r12,%rdi + mov %r11,-240(%rbp) + call int32_twostages_32 + mov -240(%rbp),%r11 + vpcmpeqd %ymm10,%ymm10,%ymm10 +.L54: xor %eax,%eax + jmp .L50 +.L49: cmp $32,%rcx + jne .L51 + mov %r12,%rax + xor %edx,%edx +.L52: vmovdqu (%rax),%ymm7 + vmovdqu 32(%rax),%ymm5 + add $64,%rdx + add $256,%rax + vpminsd -128(%rax),%ymm7,%ymm8 + vpmaxsd -128(%rax),%ymm7,%ymm4 + vpminsd -96(%rax),%ymm5,%ymm1 + vpmaxsd -96(%rax),%ymm5,%ymm0 + vmovdqu -192(%rax),%ymm6 + vmovdqu -160(%rax),%ymm7 + vpminsd -64(%rax),%ymm6,%ymm5 + vpmaxsd -32(%rax),%ymm7,%ymm2 + vpmaxsd -64(%rax),%ymm6,%ymm3 + vmovdqu -160(%rax),%ymm6 + vpminsd -32(%rax),%ymm6,%ymm6 + vpminsd %ymm5,%ymm8,%ymm7 + vpmaxsd %ymm5,%ymm8,%ymm5 + vpminsd %ymm6,%ymm1,%ymm8 + vpmaxsd %ymm6,%ymm1,%ymm1 + vpminsd %ymm3,%ymm4,%ymm6 + vpmaxsd %ymm3,%ymm4,%ymm3 + vpminsd %ymm2,%ymm0,%ymm4 + vpmaxsd %ymm2,%ymm0,%ymm0 + vpminsd %ymm8,%ymm7,%ymm9 + vpmaxsd %ymm8,%ymm7,%ymm2 + vpminsd %ymm1,%ymm5,%ymm7 + vpmaxsd %ymm1,%ymm5,%ymm1 + vmovdqu %ymm9,-256(%rax) + vpminsd %ymm4,%ymm6,%ymm5 + vpmaxsd %ymm4,%ymm6,%ymm6 + vmovdqu %ymm2,-224(%rax) + vpminsd %ymm0,%ymm3,%ymm4 + vpmaxsd %ymm0,%ymm3,%ymm3 + vmovdqu %ymm5,-128(%rax) + vmovdqu %ymm7,-192(%rax) + vmovdqu %ymm1,-160(%rax) + vmovdqu %ymm6,-96(%rax) + vmovdqu %ymm4,-64(%rax) + vmovdqu %ymm3,-32(%rax) + cmp %rdx,%r15 + jg .L52 +.L56: lea (%rbx,%rbx),%rdx + xor %ecx,%ecx + cmp -80(%rbp),%rdx + setne %al + sete %cl + mov %rdx,%r8 + xor %esi,%esi + movzbl %al,%eax + mov %eax,-204(%rbp) + jmp .L53 +.L51: cmp $16,%rcx + jne .L131 + jmp .L54 +.L50: vmovdqu (%r12,%rax,4),%ymm5 + vmovdqu 32(%r12,%rax,4),%ymm6 + vpminsd 64(%r12,%rax,4),%ymm5,%ymm2 + vpminsd 96(%r12,%rax,4),%ymm6,%ymm3 + vpmaxsd 64(%r12,%rax,4),%ymm5,%ymm0 + vpmaxsd 96(%r12,%rax,4),%ymm6,%ymm1 + vpminsd %ymm3,%ymm2,%ymm4 + vpmaxsd %ymm3,%ymm2,%ymm2 + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm0 + vmovdqu %ymm4,(%r12,%rax,4) + vmovdqu %ymm2,32(%r12,%rax,4) + vmovdqu %ymm3,64(%r12,%rax,4) + vmovdqu %ymm0,96(%r12,%rax,4) + add $32,%rax + cmp %rax,%r15 + jg .L50 + jmp .L56 +.L131: cmp $8,%rcx + jne .L56 + xor %eax,%eax +.L57: vmovdqu 32(%r12,%rax,4),%ymm7 + vpmaxsd (%r12,%rax,4),%ymm7,%ymm0 + vpminsd (%r12,%rax,4),%ymm7,%ymm1 + vmovdqu %ymm0,32(%r12,%rax,4) + vmovdqu %ymm1,(%r12,%rax,4) + add $16,%rax + cmp %rax,%r15 + jg .L57 + jmp .L56 +.L59: mov -176(%rbp),%r10 + vmovdqu (%r12,%rax,4),%ymm5 + vpminsd (%r14,%rax,4),%ymm5,%ymm6 + vpmaxsd (%r14,%rax,4),%ymm5,%ymm15 + vmovdqu (%r10,%rax,4),%ymm5 + mov -296(%rbp),%r10 + vmovdqu (%r10,%rax,4),%ymm7 + mov -288(%rbp),%r10 + vmovdqa %ymm5,-240(%rbp) + vmovdqa %ymm7,-272(%rbp) + vmovdqu (%r10,%rax,4),%ymm7 + mov -112(%rbp),%r10 + vmovdqa -272(%rbp),%ymm5 + vpminsd -240(%rbp),%ymm5,%ymm1 + vpmaxsd -240(%rbp),%ymm5,%ymm5 + vmovdqa %ymm7,-240(%rbp) + vmovdqa -240(%rbp),%ymm4 + vpmaxsd (%r11,%rax,4),%ymm4,%ymm0 + vmovdqu (%r10,%rax,4),%ymm4 + vpminsd %ymm1,%ymm6,%ymm8 + mov -144(%rbp),%r10 + vmovdqa -240(%rbp),%ymm7 + vpmaxsd %ymm1,%ymm6,%ymm6 + vpminsd %ymm5,%ymm15,%ymm1 + vmovdqa %ymm4,-240(%rbp) + vpminsd (%r11,%rax,4),%ymm7,%ymm7 + vpmaxsd %ymm5,%ymm15,%ymm15 + vmovdqu (%r10,%rax,4),%ymm4 + vmovdqa %ymm4,-272(%rbp) + vmovdqa -272(%rbp),%ymm4 + vpminsd -240(%rbp),%ymm4,%ymm3 + vpmaxsd -240(%rbp),%ymm4,%ymm4 + vpminsd %ymm3,%ymm7,%ymm2 + vpmaxsd %ymm3,%ymm7,%ymm3 + vpminsd %ymm4,%ymm0,%ymm7 + vpmaxsd %ymm4,%ymm0,%ymm0 + vpminsd %ymm2,%ymm8,%ymm14 + vpminsd %ymm7,%ymm1,%ymm13 + vpminsd %ymm3,%ymm6,%ymm12 + vpminsd %ymm0,%ymm15,%ymm11 + vmovdqa %ymm14,%ymm9 + vpmaxsd %ymm3,%ymm6,%ymm6 + vpmaxsd %ymm2,%ymm8,%ymm2 + vmovdqa %ymm13,%ymm8 + vpmaxsd %ymm7,%ymm1,%ymm1 + vpmaxsd %ymm0,%ymm15,%ymm0 + vmovdqa %ymm6,-240(%rbp) + vmovdqa %ymm2,%ymm5 + vmovdqa -240(%rbp),%ymm3 + vmovdqa %ymm1,%ymm4 + vmovdqa %ymm12,%ymm7 + vmovdqa %ymm11,%ymm6 + vmovdqa %ymm0,%ymm15 + test %ecx,%ecx + je .L58 + vpxor %ymm14,%ymm10,%ymm9 + vpxor %ymm13,%ymm10,%ymm8 + vpxor %ymm12,%ymm10,%ymm7 + vpxor %ymm11,%ymm10,%ymm6 + vpxor %ymm2,%ymm10,%ymm5 + vpxor %ymm1,%ymm10,%ymm4 + vpxor %ymm3,%ymm10,%ymm3 + vpxor %ymm0,%ymm10,%ymm15 +.L58: mov -200(%rbp),%r10 + vmovdqu %ymm9,(%r12,%rax,4) + vmovdqu %ymm8,(%r10,%rax,4) + mov -192(%rbp),%r10 + vmovdqu %ymm7,(%r10,%rax,4) + mov -184(%rbp),%r10 + vmovdqu %ymm6,(%r10,%rax,4) + mov -168(%rbp),%r10 + vmovdqu %ymm5,(%r10,%rax,4) + mov -160(%rbp),%r10 + vmovdqu %ymm4,(%r10,%rax,4) + mov -152(%rbp),%r10 + vmovdqu %ymm3,(%r10,%rax,4) + mov -280(%rbp),%r10 + vmovdqu %ymm15,(%r10,%rax,4) + add $8,%rax +.L60: cmp %rax,%rdi + jg .L59 + xor $1,%ecx + lea (%rdx,%r9),%rdi +.L62: mov %rdi,%r9 + sub %rbx,%r9 + mov %r9,%rax + cmp %r9,%r8 + jg .L60 + xor -204(%rbp),%ecx + add %rdx,%rsi + add %rdx,%r8 +.L53: cmp -80(%rbp),%rsi + jge .L61 + lea (%rsi,%rbx),%rdi + jmp .L62 +.L61: salq $4,%rbx + cmp %r15,%rbx + je .L63 + mov %rdx,%rbx + jmp .L64 +.L46: cmp $4,%r14 + jne .L132 + mov %r12,%rax +.L65: cmp -144(%rbp),%rax + je .L72 + vpxor 32(%rax),%ymm12,%ymm0 + vpxor (%rax),%ymm12,%ymm1 + add $64,%rax + vmovdqu %ymm1,-64(%rax) + vmovdqu %ymm0,-32(%rax) + jmp .L65 +.L72: mov -112(%rbp),%rbx + jmp .L68 +.L132: mov %r12,%rax + cmp $2,%r14 + jne .L70 +.L69: cmp -144(%rbp),%rax + je .L72 + vpxor 32(%rax),%ymm10,%ymm2 + vpxor (%rax),%ymm10,%ymm1 + add $64,%rax + vperm2i128 $32,%ymm2,%ymm1,%ymm0 + vperm2i128 $49,%ymm2,%ymm1,%ymm1 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm2,%ymm1 + vperm2i128 $49,%ymm0,%ymm2,%ymm0 + vmovdqu %ymm1,-64(%rax) + vmovdqu %ymm0,-32(%rax) + jmp .L69 +.L70: cmp -144(%rbp),%rax + je .L72 + vpxor 32(%rax),%ymm11,%ymm2 + vpxor (%rax),%ymm11,%ymm1 + add $64,%rax + vperm2i128 $32,%ymm2,%ymm1,%ymm0 + vperm2i128 $49,%ymm2,%ymm1,%ymm1 + vpunpcklqdq %ymm1,%ymm0,%ymm2 + vpunpckhqdq %ymm1,%ymm0,%ymm0 + vpminsd %ymm0,%ymm2,%ymm1 + vpmaxsd %ymm0,%ymm2,%ymm2 + vpunpcklqdq %ymm2,%ymm1,%ymm0 + vpunpckhqdq %ymm2,%ymm1,%ymm1 + vpminsd %ymm1,%ymm0,%ymm2 + vpmaxsd %ymm1,%ymm0,%ymm0 + vperm2i128 $32,%ymm0,%ymm2,%ymm1 + vperm2i128 $49,%ymm0,%ymm2,%ymm0 + vmovdqu %ymm1,-64(%rax) + vmovdqu %ymm0,-32(%rax) + jmp .L70 +.L137: cmp $32,%rbx + jne .L75 +.L74: mov %rbx,%rdx + mov %r15,%rsi + mov %r12,%rdi + sar $3,%rbx + sar $2,%rdx + call int32_threestages + vmovdqa .LC2(%rip),%ymm12 + vmovdqa .LC3(%rip),%ymm11 + vmovdqa .LC1(%rip),%ymm10 +.L68: cmp $127,%rbx + jle .L137 + jmp .L74 +.L139: sar $2,%rbx +.L75: cmp $15,%rbx + jle .L138 + mov %rbx,%rcx + xor %esi,%esi + sarq %rcx + imul $-8,%rcx,%rdi + lea 0(,%rcx,4),%rdx + lea (%r12,%rdx),%r11 + lea (%r11,%rdx),%r10 + lea (%r10,%rdx),%r8 + lea (%rdi,%r8),%rax + lea (%rax,%rdx),%r9 + mov %rax,-136(%rbp) + lea (%r9,%rdx),%rax + mov %rax,-240(%rbp) +.L76: cmp %r15,%rsi + jge .L139 + mov %rsi,%rax +.L78: cmp %rcx,%rax + jge .L140 + vmovdqu (%r12,%rax,4),%ymm6 + vmovdqu (%r11,%rax,4),%ymm5 + vpminsd (%r10,%rax,4),%ymm6,%ymm2 + vpminsd (%r8,%rax,4),%ymm5,%ymm3 + mov -136(%rbp),%rdi + vpmaxsd (%r10,%rax,4),%ymm6,%ymm0 + vpmaxsd (%r8,%rax,4),%ymm5,%ymm1 + vpminsd %ymm3,%ymm2,%ymm4 + vpmaxsd %ymm3,%ymm2,%ymm2 + vmovdqu %ymm4,(%r12,%rax,4) + vmovdqu %ymm2,(%rdi,%rax,4) + mov -240(%rbp),%rdi + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm0 + vmovdqu %ymm3,(%r9,%rax,4) + vmovdqu %ymm0,(%rdi,%rax,4) + add $8,%rax + jmp .L78 +.L140: add %rdx,%rsi + add %rdx,%rcx + jmp .L76 +.L138: cmp $8,%rbx + je .L109 +.L83: mov -152(%rbp),%rdx + mov -160(%rbp),%rcx + xor %eax,%eax + mov -168(%rbp),%rsi + mov -176(%rbp),%rdi + mov -184(%rbp),%r8 + mov -192(%rbp),%r9 + mov -200(%rbp),%r10 + jmp .L81 +.L109: xor %eax,%eax +.L80: cmp %r15,%rax + jge .L83 + vmovdqu (%r12,%rax,4),%ymm5 + vpminsd 32(%r12,%rax,4),%ymm5,%ymm1 + vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0 + vmovdqu %ymm1,(%r12,%rax,4) + vmovdqu %ymm0,32(%r12,%rax,4) + add $16,%rax + jmp .L80 +.L81: cmp -80(%rbp),%rax + jge .L141 + vmovdqu (%rdi),%ymm7 + add $32,%r10 + add $32,%r9 + add $32,%r8 + add $32,%rdi + add $32,%rsi + add $32,%rcx + add $32,%rdx + vmovdqu (%r12,%rax,4),%ymm5 + vmovdqu -32(%r9),%ymm6 + vpminsd -32(%r10),%ymm5,%ymm3 + vpmaxsd -32(%r10),%ymm5,%ymm1 + vpminsd -32(%r8),%ymm6,%ymm2 + vpmaxsd -32(%r8),%ymm6,%ymm0 + vpminsd -32(%rsi),%ymm7,%ymm7 + vmovdqu -32(%rcx),%ymm5 + vmovdqu -32(%rdi),%ymm6 + vpmaxsd -32(%rdx),%ymm5,%ymm4 + vpminsd %ymm2,%ymm3,%ymm9 + vpmaxsd -32(%rsi),%ymm6,%ymm8 + vpminsd -32(%rdx),%ymm5,%ymm6 + vpminsd %ymm0,%ymm1,%ymm13 + vpmaxsd %ymm2,%ymm3,%ymm2 + vpminsd %ymm6,%ymm7,%ymm5 + vpminsd %ymm4,%ymm8,%ymm3 + vpmaxsd %ymm6,%ymm7,%ymm6 + vpmaxsd %ymm0,%ymm1,%ymm0 + vpmaxsd %ymm4,%ymm8,%ymm4 + vpminsd %ymm5,%ymm9,%ymm1 + vpminsd %ymm6,%ymm2,%ymm8 + vpminsd %ymm3,%ymm13,%ymm7 + vmovdqu %ymm1,(%r12,%rax,4) + add $8,%rax + vpmaxsd %ymm6,%ymm2,%ymm2 + vpmaxsd %ymm5,%ymm9,%ymm5 + vmovdqu %ymm7,-32(%r10) + vpminsd %ymm4,%ymm0,%ymm6 + vpmaxsd %ymm3,%ymm13,%ymm3 + vmovdqu %ymm8,-32(%r9) + vpmaxsd %ymm4,%ymm0,%ymm0 + vmovdqu %ymm6,-32(%r8) + vmovdqu %ymm5,-32(%rdi) + vmovdqu %ymm3,-32(%rsi) + vmovdqu %ymm2,-32(%rcx) + vmovdqu %ymm0,-32(%rdx) + jmp .L81 +.L141: sarq %r14 + decl -272(%rbp) + jne .L46 + mov %r12,%rax + xor %edx,%edx + vpcmpeqd %ymm5,%ymm5,%ymm5 +.L85: cmp %r15,%rdx + jge .L89 + vmovdqu (%rax),%ymm7 + vpunpckldq 32(%rax),%ymm7,%ymm12 + vpunpckhdq 32(%rax),%ymm7,%ymm6 + vmovdqu 64(%rax),%ymm7 + vpunpckldq 96(%rax),%ymm7,%ymm2 + vpunpckhdq 96(%rax),%ymm7,%ymm4 + vmovdqu 128(%rax),%ymm7 + vpunpckldq 160(%rax),%ymm7,%ymm1 + vpunpckhdq 160(%rax),%ymm7,%ymm0 + vpunpcklqdq %ymm2,%ymm12,%ymm8 + vpunpcklqdq %ymm4,%ymm6,%ymm9 + cmp $0,-116(%rbp) + vmovdqu 192(%rax),%ymm7 + vpunpckhqdq %ymm2,%ymm12,%ymm12 + vpunpckhqdq %ymm4,%ymm6,%ymm4 + vpunpckldq 224(%rax),%ymm7,%ymm10 + vpunpckhdq 224(%rax),%ymm7,%ymm3 + vpunpcklqdq %ymm10,%ymm1,%ymm11 + vpunpckhqdq %ymm10,%ymm1,%ymm1 + vpunpcklqdq %ymm3,%ymm0,%ymm7 + vpunpckhqdq %ymm3,%ymm0,%ymm0 + je .L86 + vpxor %ymm5,%ymm12,%ymm12 + vpxor %ymm5,%ymm4,%ymm4 + vpxor %ymm5,%ymm1,%ymm1 + vpxor %ymm5,%ymm0,%ymm0 + jmp .L87 +.L86: vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm5,%ymm9,%ymm9 + vpxor %ymm5,%ymm11,%ymm11 + vpxor %ymm5,%ymm7,%ymm7 +.L87: vperm2i128 $32,%ymm11,%ymm8,%ymm3 + vperm2i128 $32,%ymm1,%ymm12,%ymm6 + vperm2i128 $32,%ymm7,%ymm9,%ymm10 + add $64,%rdx + vperm2i128 $32,%ymm0,%ymm4,%ymm13 + vperm2i128 $49,%ymm11,%ymm8,%ymm11 + vperm2i128 $49,%ymm7,%ymm9,%ymm9 + add $256,%rax + vperm2i128 $49,%ymm1,%ymm12,%ymm1 + vperm2i128 $49,%ymm0,%ymm4,%ymm0 + vpmaxsd %ymm6,%ymm3,%ymm2 + vpminsd %ymm6,%ymm3,%ymm4 + vpminsd %ymm1,%ymm11,%ymm7 + vpmaxsd %ymm13,%ymm10,%ymm3 + vpminsd %ymm13,%ymm10,%ymm8 + vpmaxsd %ymm1,%ymm11,%ymm1 + vpminsd %ymm0,%ymm9,%ymm10 + vpmaxsd %ymm0,%ymm9,%ymm0 + vpminsd %ymm8,%ymm4,%ymm11 + vpminsd %ymm3,%ymm2,%ymm9 + vpmaxsd %ymm8,%ymm4,%ymm8 + vpminsd %ymm10,%ymm7,%ymm6 + vpmaxsd %ymm10,%ymm7,%ymm4 + vpmaxsd %ymm3,%ymm2,%ymm2 + vpminsd %ymm0,%ymm1,%ymm3 + vpmaxsd %ymm0,%ymm1,%ymm1 + vpminsd %ymm6,%ymm11,%ymm10 + vpmaxsd %ymm6,%ymm11,%ymm0 + vpminsd %ymm3,%ymm9,%ymm7 + vpmaxsd %ymm3,%ymm9,%ymm6 + vpminsd %ymm4,%ymm8,%ymm3 + vpminsd %ymm1,%ymm2,%ymm9 + vpmaxsd %ymm4,%ymm8,%ymm4 + vpunpckldq %ymm7,%ymm10,%ymm8 + vpmaxsd %ymm1,%ymm2,%ymm2 + vpunpckhdq %ymm7,%ymm10,%ymm7 + vpunpckldq %ymm9,%ymm3,%ymm1 + vpunpckhdq %ymm9,%ymm3,%ymm3 + vpunpckldq %ymm6,%ymm0,%ymm9 + vpunpckhdq %ymm6,%ymm0,%ymm6 + vpunpckldq %ymm2,%ymm4,%ymm0 + vpunpckhdq %ymm2,%ymm4,%ymm2 + vpunpcklqdq %ymm3,%ymm7,%ymm10 + vpunpcklqdq %ymm1,%ymm8,%ymm4 + vpunpcklqdq %ymm0,%ymm9,%ymm13 + vpunpckhqdq %ymm1,%ymm8,%ymm8 + vpunpckhqdq %ymm3,%ymm7,%ymm3 + vpunpckhqdq %ymm0,%ymm9,%ymm1 + vpunpcklqdq %ymm2,%ymm6,%ymm7 + vpunpckhqdq %ymm2,%ymm6,%ymm0 + vperm2i128 $32,%ymm13,%ymm4,%ymm12 + vperm2i128 $32,%ymm1,%ymm8,%ymm11 + vperm2i128 $32,%ymm0,%ymm3,%ymm6 + vperm2i128 $32,%ymm7,%ymm10,%ymm9 + vperm2i128 $49,%ymm13,%ymm4,%ymm4 + vmovdqu %ymm12,-256(%rax) + vperm2i128 $49,%ymm1,%ymm8,%ymm1 + vperm2i128 $49,%ymm7,%ymm10,%ymm2 + vperm2i128 $49,%ymm0,%ymm3,%ymm0 + vmovdqu %ymm11,-224(%rax) + vmovdqu %ymm9,-192(%rax) + vmovdqu %ymm6,-160(%rax) + vmovdqu %ymm4,-128(%rax) + vmovdqu %ymm1,-96(%rax) + vmovdqu %ymm2,-64(%rax) + vmovdqu %ymm0,-32(%rax) + jmp .L85 +.L142: cmp $32,-112(%rbp) + jne .L94 +.L93: mov -112(%rbp),%rcx + sar $2,%rcx + lea 0(,%rcx,4),%rdx + lea 0(,%rcx,8),%rax + mov %rcx,-136(%rbp) + lea (%r12,%rdx),%r9 + mov %rax,-184(%rbp) + imul $-24,%rcx,%rax + lea (%r9,%rdx),%r14 + lea (%r14,%rdx),%rsi + lea (%rsi,%rdx),%rbx + lea (%rbx,%rdx),%r10 + lea (%r10,%rdx),%r8 + lea (%r8,%rdx),%rdi + add %rdi,%rax + mov %rax,-176(%rbp) + add %rdx,%rax + mov %rax,-168(%rbp) + add %rdx,%rax + lea (%rax,%rdx),%r11 + mov %rax,-160(%rbp) + lea (%r11,%rdx),%rax + mov %rax,-200(%rbp) + add %rdx,%rax + add %rax,%rdx + mov %rax,-144(%rbp) + mov %rdx,-192(%rbp) +.L90: mov -136(%rbp),%rax + sub %rcx,%rax + cmp %rax,%r15 + jg .L92 + sarq $3,-112(%rbp) +.L89: cmp $127,-112(%rbp) + jle .L142 + jmp .L93 +.L92: cmp -136(%rbp),%rax + jge .L143 + vmovdqu (%r12,%rax,4),%ymm6 + vpminsd (%rbx,%rax,4),%ymm6,%ymm7 + vpmaxsd (%rbx,%rax,4),%ymm6,%ymm4 + vmovdqu (%r9,%rax,4),%ymm6 + vpminsd (%r10,%rax,4),%ymm6,%ymm1 + vpmaxsd (%r10,%rax,4),%ymm6,%ymm0 + vmovdqu (%r14,%rax,4),%ymm6 + vpminsd (%r8,%rax,4),%ymm6,%ymm5 + vpmaxsd (%r8,%rax,4),%ymm6,%ymm3 + vmovdqu (%rsi,%rax,4),%ymm6 + vpminsd (%rdi,%rax,4),%ymm6,%ymm6 + vpminsd %ymm5,%ymm7,%ymm9 + vmovdqu (%rsi,%rax,4),%ymm2 + vpmaxsd %ymm5,%ymm7,%ymm5 + mov -176(%rbp),%rdx + vpminsd %ymm3,%ymm4,%ymm8 + vpminsd %ymm6,%ymm1,%ymm7 + vpmaxsd %ymm3,%ymm4,%ymm3 + vpminsd %ymm7,%ymm9,%ymm10 + vpmaxsd %ymm7,%ymm9,%ymm4 + vpmaxsd (%rdi,%rax,4),%ymm2,%ymm2 + vpmaxsd %ymm6,%ymm1,%ymm1 + vmovdqu %ymm10,(%r12,%rax,4) + vmovdqu %ymm4,(%rdx,%rax,4) + mov -168(%rbp),%rdx + vpminsd %ymm1,%ymm5,%ymm9 + vpmaxsd %ymm1,%ymm5,%ymm1 + vpminsd %ymm2,%ymm0,%ymm6 + vpmaxsd %ymm2,%ymm0,%ymm0 + vmovdqu %ymm9,(%rdx,%rax,4) + vpminsd %ymm6,%ymm8,%ymm7 + vpmaxsd %ymm6,%ymm8,%ymm2 + mov -160(%rbp),%rdx + vpminsd %ymm0,%ymm3,%ymm5 + vpmaxsd %ymm0,%ymm3,%ymm3 + vmovdqu %ymm1,(%rdx,%rax,4) + mov -200(%rbp),%rdx + vmovdqu %ymm7,(%r11,%rax,4) + vmovdqu %ymm2,(%rdx,%rax,4) + mov -144(%rbp),%rdx + vmovdqu %ymm5,(%rdx,%rax,4) + mov -192(%rbp),%rdx + vmovdqu %ymm3,(%rdx,%rax,4) + add $8,%rax + jmp .L92 +.L143: mov -184(%rbp),%rdx + add %rdx,-136(%rbp) + jmp .L90 +.L145: sarq $2,-112(%rbp) +.L94: cmp $15,-112(%rbp) + jle .L144 + mov -112(%rbp),%rcx + xor %esi,%esi + sarq %rcx + imul $-8,%rcx,%rdi + lea 0(,%rcx,4),%rdx + lea (%r12,%rdx),%r11 + lea (%r11,%rdx),%r10 + lea (%r10,%rdx),%r8 + add %r8,%rdi + lea (%rdi,%rdx),%r9 + lea (%r9,%rdx),%rbx +.L95: cmp %r15,%rsi + jge .L145 + mov %rsi,%rax +.L97: cmp %rcx,%rax + jge .L146 + vmovdqu (%r12,%rax,4),%ymm5 + vpminsd (%r10,%rax,4),%ymm5,%ymm2 + vpmaxsd (%r10,%rax,4),%ymm5,%ymm0 + vmovdqu (%r11,%rax,4),%ymm5 + vpminsd (%r8,%rax,4),%ymm5,%ymm3 + vpmaxsd (%r8,%rax,4),%ymm5,%ymm1 + vpminsd %ymm3,%ymm2,%ymm4 + vpmaxsd %ymm3,%ymm2,%ymm2 + vpminsd %ymm1,%ymm0,%ymm3 + vpmaxsd %ymm1,%ymm0,%ymm0 + vmovdqu %ymm4,(%r12,%rax,4) + vmovdqu %ymm2,(%rdi,%rax,4) + vmovdqu %ymm3,(%r9,%rax,4) + vmovdqu %ymm0,(%rbx,%rax,4) + add $8,%rax + jmp .L97 +.L146: add %rdx,%rsi + add %rdx,%rcx + jmp .L95 +.L144: cmp $8,-112(%rbp) + je .L111 +.L102: mov -152(%rbp),%rdx + add -128(%rbp),%rdx + xor %ecx,%ecx + vpcmpeqd %ymm6,%ymm6,%ymm6 + lea (%rdx,%r13),%r10 + lea (%r10,%r13),%r9 + lea (%r9,%r13),%r8 + lea (%r8,%r13),%rdi + lea (%rdi,%r13),%rsi + lea (%rsi,%r13),%rax + jmp .L100 +.L111: xor %eax,%eax +.L99: cmp %r15,%rax + jge .L102 + vmovdqu (%r12,%rax,4),%ymm5 + vpminsd 32(%r12,%rax,4),%ymm5,%ymm1 + vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0 + vmovdqu %ymm1,(%r12,%rax,4) + vmovdqu %ymm0,32(%r12,%rax,4) + add $16,%rax + jmp .L99 +.L104: vmovdqu (%r10),%ymm7 + vmovdqu (%r12,%rcx,4),%ymm4 + vpminsd (%r9),%ymm7,%ymm3 + vpminsd (%rdx),%ymm4,%ymm5 + vpmaxsd (%r9),%ymm7,%ymm2 + vpmaxsd (%rdx),%ymm4,%ymm4 + vmovdqu (%r8),%ymm7 + vmovdqu (%rsi),%ymm14 + vpminsd %ymm3,%ymm5,%ymm11 + vpmaxsd %ymm3,%ymm5,%ymm3 + vpminsd (%rdi),%ymm7,%ymm1 + vpminsd %ymm2,%ymm4,%ymm10 + cmp $0,-116(%rbp) + vpmaxsd (%rdi),%ymm7,%ymm0 + vmovdqu (%rsi),%ymm7 + vpmaxsd %ymm2,%ymm4,%ymm2 + vpminsd (%rax),%ymm7,%ymm7 + vpmaxsd (%rax),%ymm14,%ymm9 + vpminsd %ymm7,%ymm1,%ymm8 + vpmaxsd %ymm7,%ymm1,%ymm1 + vpminsd %ymm9,%ymm0,%ymm7 + vpmaxsd %ymm9,%ymm0,%ymm0 + vpminsd %ymm8,%ymm11,%ymm5 + vpminsd %ymm1,%ymm3,%ymm9 + vpminsd %ymm7,%ymm10,%ymm12 + vpmaxsd %ymm1,%ymm3,%ymm3 + vpminsd %ymm0,%ymm2,%ymm4 + vpmaxsd %ymm8,%ymm11,%ymm8 + vpmaxsd %ymm0,%ymm2,%ymm2 + vpmaxsd %ymm7,%ymm10,%ymm7 + vpunpckldq %ymm8,%ymm5,%ymm11 + vpunpckldq %ymm7,%ymm12,%ymm10 + vpunpckhdq %ymm8,%ymm5,%ymm8 + vpunpckhdq %ymm7,%ymm12,%ymm7 + vpunpckhdq %ymm3,%ymm9,%ymm5 + vpunpckldq %ymm2,%ymm4,%ymm1 + vpunpckldq %ymm3,%ymm9,%ymm0 + vpunpckhdq %ymm2,%ymm4,%ymm4 + vpunpcklqdq %ymm0,%ymm11,%ymm3 + vpunpckhqdq %ymm0,%ymm11,%ymm9 + vpunpcklqdq %ymm5,%ymm8,%ymm2 + vpunpcklqdq %ymm4,%ymm7,%ymm11 + vpunpckhqdq %ymm5,%ymm8,%ymm5 + vpunpcklqdq %ymm1,%ymm10,%ymm12 + vpunpckhqdq %ymm4,%ymm7,%ymm0 + vpunpckhqdq %ymm1,%ymm10,%ymm1 + vperm2i128 $32,%ymm11,%ymm2,%ymm8 + vperm2i128 $32,%ymm12,%ymm3,%ymm10 + vperm2i128 $32,%ymm1,%ymm9,%ymm7 + vperm2i128 $32,%ymm0,%ymm5,%ymm4 + vperm2i128 $49,%ymm12,%ymm3,%ymm3 + vperm2i128 $49,%ymm11,%ymm2,%ymm2 + vperm2i128 $49,%ymm1,%ymm9,%ymm1 + vperm2i128 $49,%ymm0,%ymm5,%ymm0 + je .L103 + vpxor %ymm6,%ymm10,%ymm10 + vpxor %ymm6,%ymm8,%ymm8 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm6,%ymm4,%ymm4 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm6,%ymm0,%ymm0 +.L103: add $32,%rdx + add $32,%r10 + add $32,%r9 + add $32,%r8 + vmovdqu %ymm10,(%r12,%rcx,4) + add $32,%rdi + add $8,%rcx + add $32,%rsi + vmovdqu %ymm3,-32(%rdx) + add $32,%rax + vmovdqu %ymm8,-32(%r10) + vmovdqu %ymm2,-32(%r9) + vmovdqu %ymm7,-32(%r8) + vmovdqu %ymm1,-32(%rdi) + vmovdqu %ymm4,-32(%rsi) + vmovdqu %ymm0,-32(%rax) +.L100: cmp -80(%rbp),%rcx + jl .L104 +.L35: add $264,%rsp + pop %rbx + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rbp + lea -16(%r13),%rsp + pop %r13 + ret + .endfn int32_sort_2power .rodata.cst32 .LC0: .quad -1,0,-1,0 .LC1: .quad 0,-1,-1,0 .LC2: .quad -1,-1,0,0 .LC3: .quad -4294967296,4294967295,-4294967296,4294967295 -.LC4: .quad 0x7fffffff7fffffff,0x7fffffff7fffffff - .quad 0x7fffffff7fffffff,0x7fffffff7fffffff +.LC4: .quad 0x7fffffff7fffffff + .quad 0x7fffffff7fffffff + .quad 0x7fffffff7fffffff + .quad 0x7fffffff7fffffff diff --git a/libc/nexgen32e/gc.h b/libc/nexgen32e/gc.h index b3b3b443..69559f31 100644 --- a/libc/nexgen32e/gc.h +++ b/libc/nexgen32e/gc.h @@ -1,10 +1,9 @@ #ifndef COSMOPOLITAN_LIBC_NEXGEN32E_GC_H_ #define COSMOPOLITAN_LIBC_NEXGEN32E_GC_H_ +#include "libc/nexgen32e/stackframe.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -struct StackFrame; - struct Garbages { size_t i, n; struct Garbage { diff --git a/libc/nexgen32e/memcpy.S b/libc/nexgen32e/memcpy.S index ec435c19..e50c6e4d 100644 --- a/libc/nexgen32e/memcpy.S +++ b/libc/nexgen32e/memcpy.S @@ -25,9 +25,8 @@ world's most popular function──one all programmers love. This implementation is the fastest and nearly the tiniest too. - It doesn't clobber general registers. It won't break down on old - computers or misaligned data. It's so easy that even a child - could use it──and they do. + It doesn't break when copying backwards or on misaligned data. + It's so easy that even a child could use it, and they do. */ #include "libc/nexgen32e/x86feature.h" #include "libc/macros.h" @@ -53,11 +52,10 @@ memcpy: mov %rdi,%rax / @param rdi is dest / @param rsi is src / @param rdx is number of bytes -/ @clob flags,xmm3,xmm4 +/ @clob flags,rcx,xmm3,xmm4 / @mode long .align 16 MemCpy: .leafprologue - push %rcx mov $.Lmemcpytab.ro.size,%ecx cmp %rcx,%rdx cmovb %rdx,%rcx @@ -95,8 +93,7 @@ MemCpy: .leafprologue mov %rcx,(%rdi) mov %rbx,-8(%rdi,%rdx) 1: pop %rbx -.L0: pop %rcx - .leafepilogue +.L0: .leafepilogue .L4: push %rbx mov (%rsi),%ecx mov -4(%rsi,%rdx),%ebx diff --git a/libc/nexgen32e/memmove.S b/libc/nexgen32e/memmove.S index 3efe08d3..13a0c703 100644 --- a/libc/nexgen32e/memmove.S +++ b/libc/nexgen32e/memmove.S @@ -27,6 +27,7 @@ / @param rsi is src / @param rdx is number of bytes / @return original rdi copied to rax +/ @clob flags,rcx / @asyncsignalsafe memmove: mov %rdi,%rax @@ -36,7 +37,6 @@ memmove: MemMove: .leafprologue .profilable - push %rcx push %rdi push %rsi mov %rdx,%rcx @@ -49,7 +49,6 @@ MemMove: cld pop %rsi pop %rdi - pop %rcx .leafepilogue .endfn memmove,globl .source __FILE__ diff --git a/libc/nexgen32e/memset.S b/libc/nexgen32e/memset.S index a04b5392..ad31d05d 100644 --- a/libc/nexgen32e/memset.S +++ b/libc/nexgen32e/memset.S @@ -43,12 +43,11 @@ memset: mov %rdi,%rax / @param rdi is dest / @param esi is the byte to set / @param edx is the number of bytes to set -/ @clob flags,xmm3 +/ @clob flags,rcx,xmm3 / @mode long MemSet: .leafprologue .profilable push %rbx - push %rcx movd %esi,%xmm3 mov $.Lmemsettab.ro.size,%ecx cmp %rcx,%rdx @@ -77,8 +76,7 @@ MemSet: .leafprologue ja 1b movdqu %xmm3,-16(%rdi,%rdx) pxor %xmm3,%xmm3 -.L0: pop %rcx - pop %rbx +.L0: pop %rbx .leafepilogue .L8: movzbq %sil,%rbx mov $0x0101010101010101,%rcx diff --git a/libc/nexgen32e/stackframe.h b/libc/nexgen32e/stackframe.h new file mode 100644 index 00000000..24435f76 --- /dev/null +++ b/libc/nexgen32e/stackframe.h @@ -0,0 +1,13 @@ +#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_ +#define COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +struct StackFrame { + struct StackFrame *next; + intptr_t addr; +}; + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_ */ diff --git a/libc/runtime/construct.S b/libc/runtime/construct.S index c64f54c6..2614773e 100644 --- a/libc/runtime/construct.S +++ b/libc/runtime/construct.S @@ -40,6 +40,10 @@ _construct: je 2f push %rax push %rcx + mov %r12,%rdi + mov %r13,%rsi + mov %r14,%rdx + mov %r15,%rcx call *(%rax) pop %rcx pop %rax diff --git a/tool/viz/ycbcrio.c b/libc/runtime/directmap.c similarity index 58% rename from tool/viz/ycbcrio.c rename to libc/runtime/directmap.c index 5d3a4480..18b034d5 100644 --- a/tool/viz/ycbcrio.c +++ b/libc/runtime/directmap.c @@ -17,59 +17,39 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "dsp/mpeg/ycbcrio.h" -#include "libc/fmt/fmt.h" -#include "libc/log/check.h" -#include "libc/mem/mem.h" -#include "libc/runtime/runtime.h" -#include "libc/stdio/stdio.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/ex.h" -#include "libc/sysv/consts/exit.h" -#include "third_party/getopt/getopt.h" +#include "libc/calls/internal.h" +#include "libc/nt/memory.h" +#include "libc/nt/runtime.h" +#include "libc/runtime/directmap.h" -#define USAGE \ - " [FLAGS] [PATH...]\n\ -\n\ -Flags:\n\ - -h shows this information\n\ -\n" - -static char *inpath_; - -static void PrintUsage(int rc, FILE *f) { - fputs("Usage: ", f); - fputs(program_invocation_name, f); - fputs(USAGE, f); - exit(rc); -} - -static void GetOpts(int *argc, char *argv[]) { - int opt; - while ((opt = getopt(*argc, argv, "?h")) != -1) { - switch (opt) { - case '?': - case 'h': - PrintUsage(EXIT_SUCCESS, stdout); - default: - PrintUsage(EX_USAGE, stderr); +static textwindows struct DirectMap DirectMapNt(void *addr, size_t size, + unsigned prot, unsigned flags, + int fd, int64_t off) { + struct DirectMap res; + if ((res.maphandle = CreateFileMappingNuma( + fd != -1 ? g_fds.p[fd].handle : kNtInvalidHandleValue, + &kNtIsInheritable, prot2nt(prot, flags), size >> 32, size, NULL, + kNtNumaNoPreferredNode))) { + if (!(res.addr = MapViewOfFileExNuma(res.maphandle, fprot2nt(prot, flags), + off >> 32, off, size, addr, + kNtNumaNoPreferredNode))) { + CloseHandle(res.maphandle); + res.maphandle = kNtInvalidHandleValue; + res.addr = (void *)(intptr_t)winerr(); } + } else { + res.maphandle = kNtInvalidHandleValue; + res.addr = (void *)(intptr_t)winerr(); } + return res; } -static void ProcessFile(struct Ycbcrio *m) { - /* m->frame-> */ -} - -int main(int argc, char *argv[]) { - size_t i; - struct Ycbcrio *m; - GetOpts(&argc, argv); - for (i = optind; i < argc; ++i) { - inpath_ = argv[i]; - m = YcbcrioOpen(inpath_, NULL); - ProcessFile(m); - YcbcrioClose(&m); +struct DirectMap DirectMap(void *addr, size_t size, unsigned prot, + unsigned flags, int fd, int64_t off) { + if (!IsWindows()) { + return (struct DirectMap){mmap$sysv(addr, size, prot, flags, fd, off), + kNtInvalidHandleValue}; + } else { + return DirectMapNt(addr, size, prot, flags, fd, off); } - return 0; } diff --git a/libc/runtime/directmap.h b/libc/runtime/directmap.h new file mode 100644 index 00000000..204838b9 --- /dev/null +++ b/libc/runtime/directmap.h @@ -0,0 +1,15 @@ +#ifndef COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_ +#define COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +struct DirectMap { + void *addr; + int64_t maphandle; +}; + +struct DirectMap DirectMap(void *, size_t, unsigned, unsigned, int, int64_t); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_ */ diff --git a/libc/runtime/ezmap.c b/libc/runtime/ezmap.c index 311bee8b..c5331922 100644 --- a/libc/runtime/ezmap.c +++ b/libc/runtime/ezmap.c @@ -17,7 +17,6 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/pushpop.h" #include "libc/bits/safemacros.h" #include "libc/calls/calls.h" #include "libc/limits.h" @@ -51,8 +50,14 @@ int mapfileread(const char *filename, struct MappedFile *mf) { int unmapfile(struct MappedFile *mf) { int rc; rc = 0; - rc |= munmap_s(&mf->addr, mf->size); - rc |= close_s(&mf->fd); - pushmov(&mf->size, 0); + if (mf->addr != MAP_FAILED) { + rc |= munmap(mf->addr, mf->size); + mf->addr = MAP_FAILED; + } + if (mf->fd != -1) { + rc |= close(mf->fd); + mf->fd = -1; + } + mf->size = 0; return rc; } diff --git a/libc/runtime/ftrace.greg.c b/libc/runtime/ftrace.greg.c index 79a9e660..0ce88056 100644 --- a/libc/runtime/ftrace.greg.c +++ b/libc/runtime/ftrace.greg.c @@ -25,6 +25,7 @@ #include "libc/calls/struct/sigset.h" #include "libc/dce.h" #include "libc/macros.h" +#include "libc/nexgen32e/stackframe.h" #include "libc/nt/files.h" #include "libc/nt/runtime.h" #include "libc/nt/thunk/msabi.h" diff --git a/libc/runtime/gc.h b/libc/runtime/gc.h index 9c28da2d..ef8bee5a 100644 --- a/libc/runtime/gc.h +++ b/libc/runtime/gc.h @@ -1,6 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_RUNTIME_GC_H_ #define COSMOPOLITAN_LIBC_RUNTIME_GC_H_ #include "libc/calls/calls.h" +#include "libc/nexgen32e/stackframe.h" #include "libc/runtime/runtime.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ @@ -13,8 +14,6 @@ COSMOPOLITAN_C_START_ * using very few lines of code. */ -struct StackFrame; - /** * Releases resource when function returns. * diff --git a/libc/runtime/grow.c b/libc/runtime/grow.c index 0ae68588..6f77227f 100644 --- a/libc/runtime/grow.c +++ b/libc/runtime/grow.c @@ -19,10 +19,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/bits/bits.h" -#include "libc/bits/safemacros.h" #include "libc/bits/weaken.h" #include "libc/conv/conv.h" -#include "libc/conv/sizemultiply.h" #include "libc/macros.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" @@ -70,8 +68,9 @@ bool grow(void *pp, size_t *capacity, size_t itemsize, size_t extra) { p1 = isheap(*p) ? *p : NULL; p2 = NULL; n1 = *capacity; - n2 = (*p ? n1 + (n1 >> 1) : max(4, INITIAL_CAPACITY / itemsize)) + extra; - if (sizemultiply(&t1, n1, itemsize) && sizemultiply(&t2, n2, itemsize)) { + n2 = (*p ? n1 + (n1 >> 1) : MAX(4, INITIAL_CAPACITY / itemsize)) + extra; + if (!__builtin_mul_overflow(n1, itemsize, &t1) && + !__builtin_mul_overflow(n2, itemsize, &t2)) { if (weaken(realloc) && (p2 = weaken(realloc)(p1, ROUNDUP(t2, 32)))) { if (!p1 && *p) memcpy(p2, *p, t1); memset((char *)p2 + t1, 0, t2 - t1); diff --git a/libc/runtime/internal.h b/libc/runtime/internal.h index 48674264..22f4699a 100644 --- a/libc/runtime/internal.h +++ b/libc/runtime/internal.h @@ -14,6 +14,7 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ +hidden extern bool _mmap_asan_mode; hidden extern char **g_freebsdhint; hidden extern unsigned g_runstate; hidden extern void *g_stacktop; diff --git a/libc/runtime/memtrack.h b/libc/runtime/memtrack.h index 8f233e50..d2c76c43 100644 --- a/libc/runtime/memtrack.h +++ b/libc/runtime/memtrack.h @@ -1,13 +1,11 @@ #ifndef COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_ #define COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_ -#include "libc/nexgen32e/vendor.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -#define kMappingsSize 0x0000100000000000 /* 16TB */ -#define kMappingsStart (IsGenuineCosmo() ? 0x300000000000 : 0x200000000000) -#define kFixedMappingsStart 0x0000100000000000 -#define kFixedMappingsSize kMappingsSize +#define kAutomapStart 0x0000100000000000 +#define kAutomapSize 0x0000100000000000 +#define kFixedmapStart 0x0000200000000000 struct MemoryIntervals { int i; @@ -27,6 +25,7 @@ int TrackMemoryInterval(struct MemoryIntervals *, int, int, long); int ReleaseMemoryIntervals(struct MemoryIntervals *, int, int, void (*)(struct MemoryIntervals *, int, int)); void ReleaseMemoryNt(struct MemoryIntervals *, int, int); +int UntrackMemoryIntervals(void *, size_t); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/runtime/mmap.c b/libc/runtime/mmap.c index 357401ea..0de46299 100644 --- a/libc/runtime/mmap.c +++ b/libc/runtime/mmap.c @@ -18,13 +18,14 @@ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/dce.h" +#include "libc/log/asan.h" #include "libc/macros.h" -#include "libc/nt/memory.h" -#include "libc/nt/runtime.h" #include "libc/rand/rand.h" +#include "libc/runtime/directmap.h" #include "libc/runtime/memtrack.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" @@ -32,79 +33,14 @@ #include "libc/sysv/consts/prot.h" #include "libc/sysv/errfuns.h" -#define IP(X) (intptr_t)(X) -#define VIP(X) (void *)IP(X) -#define COORD(a) (int)(IP(a) >> 16) -#define ADDR(c) (void *)(IP(c) << 16) -#define ALIGNED(p) (!(IP(p) & (FRAMESIZE - 1))) -#define CANONICAL(p) (-0x800000000000 <= IP(p) && IP(p) <= 0x7fffffffffff) -#define LAST_COORD(a, n) (COORD(a) + (ROUNDUP(n, FRAMESIZE) >> 16) - 1) - -struct DirectMap { - void *addr; - int64_t maphandle; -}; +#define IP(X) (intptr_t)(X) +#define VIP(X) (void *)IP(X) +#define ADDR(c) (void *)(IP(c) << 16) +#define ALIGNED(p) (!(IP(p) & (FRAMESIZE - 1))) +#define CANONICAL(p) (-0x800000000000 <= IP(p) && IP(p) <= 0x7fffffffffff) struct MemoryIntervals _mmi; -static textwindows struct DirectMap DirectMapNt(void *addr, size_t size, - unsigned prot, unsigned flags, - int fd, int64_t off) { - struct DirectMap res; /* NT IS TORTURE */ - if ((res.maphandle = CreateFileMappingNuma( - fd != -1 ? g_fds.p[fd].handle : kNtInvalidHandleValue, - &kNtIsInheritable, prot2nt(prot, flags), size >> 32, size, NULL, - kNtNumaNoPreferredNode))) { - if (!(res.addr = MapViewOfFileExNuma(res.maphandle, fprot2nt(prot, flags), - off >> 32, off, size, addr, - kNtNumaNoPreferredNode))) { - CloseHandle(res.maphandle); - res.maphandle = kNtInvalidHandleValue; - res.addr = VIP(winerr()); - } - } else { - res.maphandle = kNtInvalidHandleValue; - res.addr = VIP(winerr()); - } - return res; -} - -static struct DirectMap DirectMap(void *addr, size_t size, unsigned prot, - unsigned flags, int fd, int64_t off) { - if (!IsWindows()) { - return (struct DirectMap){mmap$sysv(addr, size, prot, flags, fd, off), - kNtInvalidHandleValue}; - } else { - return DirectMapNt(addr, size, prot, flags, fd, off); - } -} - -static int UntrackMemoryIntervals(void *addr, size_t size) { - return ReleaseMemoryIntervals(&_mmi, COORD(addr), LAST_COORD(addr, size), - ReleaseMemoryNt); -} - -/** - * Releases memory pages. - * - * @param addr is a pointer within any memory mapped region the process - * has permission to control, such as address ranges returned by - * mmap(), the program image itself, etc. - * @param size is the amount of memory to unmap, which needn't be a - * multiple of FRAMESIZE, and may be a subset of that which was - * mapped previously, and may punch holes in existing mappings, - * but your mileage may vary on windows - * @return 0 on success, or -1 w/ errno - */ -int munmap(void *addr, size_t size) { - int rc; - if (!ALIGNED(addr) || !CANONICAL(addr) || !size) return einval(); - size = ROUNDUP(size, FRAMESIZE); - if (UntrackMemoryIntervals(addr, size) == -1) return -1; - if (IsWindows()) return 0; - return munmap$sysv(addr, size); -} - /** * Beseeches system for page-table entries. * @@ -120,8 +56,7 @@ int munmap(void *addr, size_t size) { * @return virtual base address of new mapping, or MAP_FAILED w/ errno */ void *mmap(void *addr, size_t size, int prot, int flags, int fd, int64_t off) { - int i; - long gap; + int i, x, n, a, b; struct DirectMap dm; if (!size) return VIP(einval()); if (!ALIGNED(off)) return VIP(einval()); @@ -134,24 +69,18 @@ void *mmap(void *addr, size_t size, int prot, int flags, int fd, int64_t off) { if (UntrackMemoryIntervals(addr, size) == -1) { return MAP_FAILED; } - } else if (_mmi.i) { - if (0 && IsModeDbg()) { - addr = VIP(rand64() & 0x00007ffffffff000); - } else { - for (i = _mmi.i - 1; i > 0; --i) { - gap = _mmi.p[i].x - _mmi.p[i - 1].y - 1; - assert(gap > 0); - if (gap >= (ROUNDUP(size, FRAMESIZE) >> 16)) { - addr = ADDR(_mmi.p[i - 1].y + 1); - break; - } - } - if (!addr) { - addr = ADDR(_mmi.p[_mmi.i - 1].y + 1); - } - } } else { - addr = VIP(kMappingsStart); + x = kAutomapStart >> 16; + n = ROUNDUP(size, FRAMESIZE) >> 16; + for (i = 0; i < _mmi.i; ++i) { + if (_mmi.p[i].y < x) continue; + if (_mmi.p[i].x > x + n - 1) break; + x = _mmi.p[i].y + 1; + } + if (x + n - 1 >= ((kAutomapStart + kAutomapSize) >> 16)) { + return (void *)(intptr_t)enomem(); + } + addr = (void *)(intptr_t)((int64_t)x << 16); } assert((flags & MAP_FIXED) || (!isheap(addr) && !isheap((char *)addr + size - 1))); @@ -159,9 +88,13 @@ void *mmap(void *addr, size_t size, int prot, int flags, int fd, int64_t off) { if (dm.addr == MAP_FAILED || dm.addr != addr) { return MAP_FAILED; } - if (TrackMemoryInterval(&_mmi, COORD(dm.addr), LAST_COORD(dm.addr, size), - dm.maphandle) == -1) { - _Exit(1); + a = ROUNDDOWN((intptr_t)addr, FRAMESIZE) >> 16; + b = ROUNDDOWN((intptr_t)addr + size - 1, FRAMESIZE) >> 16; + if (TrackMemoryInterval(&_mmi, a, b, dm.maphandle) == -1) { + abort(); + } + if (weaken(__asan_map_shadow)) { + weaken(__asan_map_shadow)(dm.addr, size); } return dm.addr; } diff --git a/libc/runtime/munmap_s.c b/libc/runtime/munmap.c similarity index 65% rename from libc/runtime/munmap_s.c rename to libc/runtime/munmap.c index 7fc1d6ae..1e082f3e 100644 --- a/libc/runtime/munmap_s.c +++ b/libc/runtime/munmap.c @@ -17,21 +17,34 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bits.h" -#include "libc/bits/pushpop.h" -#include "libc/calls/calls.h" +#include "libc/calls/internal.h" +#include "libc/dce.h" +#include "libc/macros.h" +#include "libc/runtime/memtrack.h" #include "libc/runtime/runtime.h" +#include "libc/sysv/errfuns.h" + +#define IP(X) (intptr_t)(X) +#define ALIGNED(p) (!(IP(p) & (FRAMESIZE - 1))) +#define CANONICAL(p) (-0x800000000000 <= IP(p) && IP(p) <= 0x7fffffffffff) /** - * Closes memory mapping. - * - * The caller's address holder is set to MAP_FAILED (-1) which is a - * no-op for subsequent invocations. + * Releases memory pages. * + * @param addr is a pointer within any memory mapped region the process + * has permission to control, such as address ranges returned by + * mmap(), the program image itself, etc. + * @param size is the amount of memory to unmap, which needn't be a + * multiple of FRAMESIZE, and may be a subset of that which was + * mapped previously, and may punch holes in existing mappings, + * but your mileage may vary on windows * @return 0 on success, or -1 w/ errno */ -int munmap_s(void *addrp, uint64_t size) { - void **addrpp = (void **)addrp; - void *addr = (void *)pushpop(-1L); - return munmap(lockxchg(addrpp, &addr), size); +int munmap(void *addr, size_t size) { + int rc; + if (!ALIGNED(addr) || !CANONICAL(addr) || !size) return einval(); + size = ROUNDUP(size, FRAMESIZE); + if (UntrackMemoryIntervals(addr, size) == -1) return -1; + if (IsWindows()) return 0; + return munmap$sysv(addr, size); } diff --git a/libc/runtime/ringalloc.c b/libc/runtime/ringalloc.c index ade57207..5e1423da 100644 --- a/libc/runtime/ringalloc.c +++ b/libc/runtime/ringalloc.c @@ -23,6 +23,7 @@ #include "libc/limits.h" #include "libc/macros.h" #include "libc/runtime/ring.h" +#include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" diff --git a/libc/runtime/ringfree.c b/libc/runtime/ringfree.c index 22666052..8b968108 100644 --- a/libc/runtime/ringfree.c +++ b/libc/runtime/ringfree.c @@ -17,8 +17,8 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" #include "libc/runtime/ring.h" +#include "libc/runtime/runtime.h" /** * Frees ring buffer. diff --git a/libc/runtime/runtime.h b/libc/runtime/runtime.h index 2dcd9846..da3919ee 100644 --- a/libc/runtime/runtime.h +++ b/libc/runtime/runtime.h @@ -6,13 +6,6 @@ COSMOPOLITAN_C_START_ │ cosmopolitan § runtime ─╬─│┼ ╚────────────────────────────────────────────────────────────────────────────│*/ -struct SymbolTable; - -struct StackFrame { - struct StackFrame *next; - intptr_t addr; -}; - typedef long jmp_buf[8] aligned(CACHELINE); extern int g_argc; /* CRT */ @@ -71,8 +64,12 @@ void loadxmm(void *); void peekall(void); int issetugid(void); void weakfree(void *) libcesque; -void __hook(void (*)(void), struct SymbolTable *); bool isheap(void *); +void *mmap(void *, uint64_t, int32_t, int32_t, int32_t, int64_t); +void *mremap(void *, uint64_t, uint64_t, int32_t, void *); +int munmap(void *, uint64_t); +int mprotect(void *, uint64_t, int) privileged; +int msync(void *, size_t, int); /*───────────────────────────────────────────────────────────────────────────│─╗ │ cosmopolitan § runtime » optimizations ─╬─│┼ diff --git a/libc/runtime/runtime.mk b/libc/runtime/runtime.mk index 497a5006..0625f4b0 100644 --- a/libc/runtime/runtime.mk +++ b/libc/runtime/runtime.mk @@ -60,13 +60,8 @@ $(LIBC_RUNTIME_A).pkg: \ $(LIBC_RUNTIME_A_OBJS) \ $(foreach x,$(LIBC_RUNTIME_A_DIRECTDEPS),$($(x)_A).pkg) -o/$(MODE)/libc/runtime/asan.greg.o \ -o/$(MODE)/libc/runtime/shadowargs.o \ -o/$(MODE)/libc/runtime/hook.greg.o \ -o/$(MODE)/libc/runtime/ftrace.greg.o \ -o/$(MODE)/libc/runtime/__stack_chk_fail.o \ -o/$(MODE)/libc/runtime/__stack_chk_guard.o: \ - OVERRIDE_COPTS += \ +$(LIBC_RUNTIME_A_OBJS): \ + OVERRIDE_CFLAGS += \ $(NO_MAGIC) # @see ape/ape.s for tuning parameters that make this safe diff --git a/libc/runtime/symbols.h b/libc/runtime/symbols.h index 552ba5b0..9e9211e9 100644 --- a/libc/runtime/symbols.h +++ b/libc/runtime/symbols.h @@ -53,6 +53,7 @@ struct SymbolTable *opensymboltable(const char *) nodiscard; int closesymboltable(struct SymbolTable **); const struct Symbol *bisectsymbol(struct SymbolTable *, intptr_t, int64_t *); const char *getsymbolname(struct SymbolTable *, const struct Symbol *); +void __hook(void (*)(void), struct SymbolTable *); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/tinymath/delegates/powi.c b/libc/runtime/untrackmemoryintervals.c similarity index 86% rename from libc/tinymath/delegates/powi.c rename to libc/runtime/untrackmemoryintervals.c index b2a09561..c45d0b9e 100644 --- a/libc/tinymath/delegates/powi.c +++ b/libc/runtime/untrackmemoryintervals.c @@ -17,8 +17,12 @@ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/tinymath.h" +#include "libc/macros.h" +#include "libc/runtime/memtrack.h" -double(powi)(double a, int b) { - return tinymath_powl(a, b); +int UntrackMemoryIntervals(void *addr, size_t size) { + int a, b; + a = ROUNDDOWN((intptr_t)addr, FRAMESIZE) >> 16; + b = ROUNDDOWN((intptr_t)addr + size - 1, FRAMESIZE) >> 16; + return ReleaseMemoryIntervals(&_mmi, a, b, ReleaseMemoryNt); } diff --git a/libc/stdio/stdio.mk b/libc/stdio/stdio.mk index 2e62bdae..b8c42622 100644 --- a/libc/stdio/stdio.mk +++ b/libc/stdio/stdio.mk @@ -53,9 +53,9 @@ $(LIBC_STDIO_A).pkg: \ $(LIBC_STDIO_A_OBJS) \ $(foreach x,$(LIBC_STDIO_A_DIRECTDEPS),$($(x)_A).pkg) -#o/$(MODE)/libc/stdio/fputc.o: \ +o/$(MODE)/libc/stdio/fputc.o: \ OVERRIDE_CFLAGS += \ - $(NO_MAGIC) + -O3 LIBC_STDIO_LIBS = $(foreach x,$(LIBC_STDIO_ARTIFACTS),$($(x))) LIBC_STDIO_SRCS = $(foreach x,$(LIBC_STDIO_ARTIFACTS),$($(x)_SRCS)) diff --git a/libc/str/str.h b/libc/str/str.h index 37babf1b..25987c72 100644 --- a/libc/str/str.h +++ b/libc/str/str.h @@ -440,6 +440,12 @@ char *_strncpy(char *, const char *, size_t) asm("strncpy") memcpyesque; #define memmove(DEST, SRC, SIZE) __memcpy("MemMove", (DEST), (SRC), (SIZE)) +#define mempcpy(DEST, SRC, SIZE) \ + ({ \ + size_t SIze = (SIZE); \ + (void *)((char *)memcpy((DEST), (SRC), SIze) + SIze); \ + }) + #define __memcpy(FN, DEST, SRC, SIZE) \ ({ \ void *DeSt = (DEST); \ @@ -448,16 +454,10 @@ char *_strncpy(char *, const char *, size_t) asm("strncpy") memcpyesque; asm("call\t" FN \ : "=m"(*(char(*)[SiZe])(DeSt)) \ : "D"(DeSt), "S"(SrC), "d"(SiZe), "m"(*(const char(*)[SiZe])(SrC)) \ - : __STR_XMM_CLOBBER "cc"); \ + : __STR_XMM_CLOBBER "rcx", "cc"); \ DeSt; \ }) -#define mempcpy(DEST, SRC, SIZE) \ - ({ \ - size_t SIze = (SIZE); \ - (void *)((char *)memcpy((DEST), (SRC), SIze) + SIze); \ - }) - #define __memset(DEST, BYTE, SIZE) \ ({ \ void *DeSt = (DEST); \ @@ -465,7 +465,7 @@ char *_strncpy(char *, const char *, size_t) asm("strncpy") memcpyesque; asm("call\tMemSet" \ : "=m"(*(char(*)[SiZe])(DeSt)) \ : "D"(DeSt), "S"(BYTE), "d"(SiZe) \ - : __STR_XMM_CLOBBER "cc"); \ + : __STR_XMM_CLOBBER "rcx", "cc"); \ DeSt; \ }) @@ -506,7 +506,7 @@ char *_strncpy(char *, const char *, size_t) asm("strncpy") memcpyesque; size_t Rcx; \ asm("rep stosb" \ : "=D"(Rdi), "=c"(Rcx), "=m"(*(char(*)[SiZe])(Dest)) \ - : "0"(Dest), "1"(SiZe), "S"(BYTE) \ + : "0"(Dest), "1"(SiZe), "a"(BYTE) \ : "cc"); \ Dest; \ }) diff --git a/libc/str/str.mk b/libc/str/str.mk index 498c44e0..835edfa3 100644 --- a/libc/str/str.mk +++ b/libc/str/str.mk @@ -42,6 +42,10 @@ $(LIBC_STR_A).pkg: \ $(LIBC_STR_A_OBJS) \ $(foreach x,$(LIBC_STR_A_DIRECTDEPS),$($(x)_A).pkg) +o/$(MODE)/libc/str/lz4cpy.o: \ + OVERRIDE_CFLAGS += \ + $(NO_MAGIC) + LIBC_STR_LIBS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x))) LIBC_STR_SRCS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_SRCS)) LIBC_STR_HDRS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/stubs/asan.S b/libc/stubs/asan.S new file mode 100644 index 00000000..390bdcc6 --- /dev/null +++ b/libc/stubs/asan.S @@ -0,0 +1,279 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" +.source __FILE__ + +/ @fileoverview Address Sanitizer Linker Poison + +__asan_addr_is_in_fake_stack: + ud2 + .endfn __asan_addr_is_in_fake_stack,weak + +__asan_after_dynamic_init: + ud2 + .endfn __asan_after_dynamic_init,weak + +__asan_alloca_poison: + ud2 + .endfn __asan_alloca_poison,weak + +__asan_allocas_unpoison: + ud2 + .endfn __asan_allocas_unpoison,weak + +__asan_before_dynamic_init: + ud2 + .endfn __asan_before_dynamic_init,weak + +__asan_get_current_fake_stack: + ud2 + .endfn __asan_get_current_fake_stack,weak + +__asan_handle_no_return: + ud2 + .endfn __asan_handle_no_return,weak + +__asan_init: + ud2 + .endfn __asan_init,weak + +__asan_load1: + ud2 + .endfn __asan_load1,weak + +__asan_load2: + ud2 + .endfn __asan_load2,weak + +__asan_load4: + ud2 + .endfn __asan_load4,weak + +__asan_load8: + ud2 + .endfn __asan_load8,weak + +__asan_load16: + ud2 + .endfn __asan_load16,weak + +__asan_load32: + ud2 + .endfn __asan_load32,weak + +__asan_noreentry: + ud2 + .endfn __asan_noreentry,weak + +__asan_option_detect_stack_use_after_return: + ud2 + .endfn __asan_option_detect_stack_use_after_return,weak + +__asan_poison_stack_memory: + ud2 + .endfn __asan_poison_stack_memory,weak + +__asan_register_globals: + ud2 + .endfn __asan_register_globals,weak + +__asan_report_load1: + ud2 + .endfn __asan_report_load1,weak + +__asan_report_load2: + ud2 + .endfn __asan_report_load2,weak + +__asan_report_load4: + ud2 + .endfn __asan_report_load4,weak + +__asan_report_load8: + ud2 + .endfn __asan_report_load8,weak + +__asan_report_load16: + ud2 + .endfn __asan_report_load16,weak + +__asan_report_load_n: + ud2 + .endfn __asan_report_load_n,weak + +__asan_report_store1: + ud2 + .endfn __asan_report_store1,weak + +__asan_report_store2: + ud2 + .endfn __asan_report_store2,weak + +__asan_report_store4: + ud2 + .endfn __asan_report_store4,weak + +__asan_report_store8: + ud2 + .endfn __asan_report_store8,weak + +__asan_report_store16: + ud2 + .endfn __asan_report_store16,weak + +__asan_report_store32: + ud2 + .endfn __asan_report_store32,weak + +__asan_report_store_n: + ud2 + .endfn __asan_report_store_n,weak + +__asan_stack_free: + ud2 + .endfn __asan_stack_free,weak + +__asan_stack_free_0: + ud2 + .endfn __asan_stack_free_0,weak + +__asan_stack_free_1: + ud2 + .endfn __asan_stack_free_1,weak + +__asan_stack_free_10: + ud2 + .endfn __asan_stack_free_10,weak + +__asan_stack_free_2: + ud2 + .endfn __asan_stack_free_2,weak + +__asan_stack_free_3: + ud2 + .endfn __asan_stack_free_3,weak + +__asan_stack_free_4: + ud2 + .endfn __asan_stack_free_4,weak + +__asan_stack_free_5: + ud2 + .endfn __asan_stack_free_5,weak + +__asan_stack_free_6: + ud2 + .endfn __asan_stack_free_6,weak + +__asan_stack_free_7: + ud2 + .endfn __asan_stack_free_7,weak + +__asan_stack_free_8: + ud2 + .endfn __asan_stack_free_8,weak + +__asan_stack_free_9: + ud2 + .endfn __asan_stack_free_9,weak + +__asan_stack_malloc: + ud2 + .endfn __asan_stack_malloc,weak + +__asan_stack_malloc_0: + ud2 + .endfn __asan_stack_malloc_0,weak + +__asan_stack_malloc_1: + ud2 + .endfn __asan_stack_malloc_1,weak + +__asan_stack_malloc_2: + ud2 + .endfn __asan_stack_malloc_2,weak + +__asan_stack_malloc_3: + ud2 + .endfn __asan_stack_malloc_3,weak + +__asan_stack_malloc_4: + ud2 + .endfn __asan_stack_malloc_4,weak + +__asan_stack_malloc_5: + ud2 + .endfn __asan_stack_malloc_5,weak + +__asan_stack_malloc_6: + ud2 + .endfn __asan_stack_malloc_6,weak + +__asan_stack_malloc_7: + ud2 + .endfn __asan_stack_malloc_7,weak + +__asan_stack_malloc_8: + ud2 + .endfn __asan_stack_malloc_8,weak + +__asan_stack_malloc_9: + ud2 + .endfn __asan_stack_malloc_9,weak + +__asan_stack_malloc_10: + ud2 + .endfn __asan_stack_malloc_10,weak + +__asan_store1: + ud2 + .endfn __asan_store1,weak + +__asan_store2: + ud2 + .endfn __asan_store2,weak + +__asan_store4: + ud2 + .endfn __asan_store4,weak + +__asan_store8: + ud2 + .endfn __asan_store8,weak + +__asan_store16: + ud2 + .endfn __asan_store16,weak + +__asan_store32: + ud2 + .endfn __asan_store32,weak + +__asan_unpoison_stack_memory: + ud2 + .endfn __asan_unpoison_stack_memory,weak + +__asan_unregister_globals: + ud2 + .endfn __asan_unregister_globals,weak + +__asan_version_mismatch_check_v8: + ud2 + .endfn __asan_version_mismatch_check_v8,weak diff --git a/libc/stubs/asan.greg.S b/libc/stubs/asan.greg.S deleted file mode 100644 index fd8a93b4..00000000 --- a/libc/stubs/asan.greg.S +++ /dev/null @@ -1,54 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.h" -.source __FILE__ - -__asan_addr_is_in_fake_stack: -__asan_alloca_poison: -__asan_allocas_unpoison: -__asan_get_current_fake_stack: -__asan_handle_no_return: -__asan_init: -__asan_loadN: -__asan_register_globals: -__asan_report_load_n: -__asan_report_store_n: -__asan_stack_free: -__asan_stack_malloc: -__asan_storeN: -__asan_unregister_globals: -__asan_version_mismatch_check_v8: - xor %eax,%eax - ret - .endfn __asan_addr_is_in_fake_stack,globl,weak - .endfn __asan_alloca_poison,globl,weak - .endfn __asan_allocas_unpoison,globl,weak - .endfn __asan_get_current_fake_stack,globl,weak - .endfn __asan_handle_no_return,globl,weak - .endfn __asan_init,globl,weak - .endfn __asan_loadN,globl,weak - .endfn __asan_register_globals,globl,weak - .endfn __asan_report_load_n,globl,weak - .endfn __asan_report_store_n,globl,weak - .endfn __asan_stack_free,globl,weak - .endfn __asan_stack_malloc,globl,weak - .endfn __asan_storeN,globl,weak - .endfn __asan_unregister_globals,globl,weak - .endfn __asan_version_mismatch_check_v8,globl,weak diff --git a/libc/tinymath/delegates/powif.c b/libc/tinymath/delegates/powif.c deleted file mode 100644 index a71a143d..00000000 --- a/libc/tinymath/delegates/powif.c +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/tinymath.h" - -float powif(float a, int b) { - return tinymath_powl(a, b); -} diff --git a/libc/tinymath/delegates/powil.c b/libc/tinymath/delegates/powil.c deleted file mode 100644 index fa607c71..00000000 --- a/libc/tinymath/delegates/powil.c +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ This program is free software; you can redistribute it and/or modify │ -│ it under the terms of the GNU General Public License as published by │ -│ the Free Software Foundation; version 2 of the License. │ -│ │ -│ This program is distributed in the hope that it will be useful, but │ -│ WITHOUT ANY WARRANTY; without even the implied warranty of │ -│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ -│ General Public License for more details. │ -│ │ -│ You should have received a copy of the GNU General Public License │ -│ along with this program; if not, write to the Free Software │ -│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ -│ 02110-1301 USA │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/tinymath.h" - -long double powil(long double a, int b) { - return tinymath_powl(a, b); -} diff --git a/libc/tinymath/fmaxl.S b/libc/tinymath/fmaxl.S index b9c29120..e920fdb1 100644 --- a/libc/tinymath/fmaxl.S +++ b/libc/tinymath/fmaxl.S @@ -20,6 +20,11 @@ #include "libc/macros.h" .source __FILE__ +/ Returns maximum of two long doubles. +/ +/ @param 𝑥 is long double passed on stack in 16-bytes +/ @param 𝑦 is also pushed on stack, in reverse order +/ @return result in %st0 tinymath_fmaxl: push %rbp mov %rsp,%rbp diff --git a/libc/tinymath/fminl.S b/libc/tinymath/fminl.S index 2f13dbec..6b5c761c 100644 --- a/libc/tinymath/fminl.S +++ b/libc/tinymath/fminl.S @@ -20,6 +20,11 @@ #include "libc/macros.h" .source __FILE__ +/ Returns minimum of two long doubles. +/ +/ @param 𝑥 is long double passed on stack in 16-bytes +/ @param 𝑦 is also pushed on stack, in reverse order +/ @return result in %st0 tinymath_fminl: push %rbp mov %rsp,%rbp diff --git a/libc/tinymath/pow.S b/libc/tinymath/pow.S index e7a73cb3..be3bec46 100644 --- a/libc/tinymath/pow.S +++ b/libc/tinymath/pow.S @@ -22,8 +22,8 @@ / Returns 𝑥^𝑦. / -/ @param 𝑦 is double scalar in low half of %xmm0 -/ @param 𝑥 is double scalar in low half of %xmm1 +/ @param 𝑥 is double scalar in low half of %xmm0 +/ @param 𝑦 is double scalar in low half of %xmm1 / @return double scalar in low half of %xmm0 tinymath_pow: ezlea tinymath_powl,ax diff --git a/libc/tinymath/powi.S b/libc/tinymath/powi.S new file mode 100644 index 00000000..d9b86eeb --- /dev/null +++ b/libc/tinymath/powi.S @@ -0,0 +1,31 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns 𝑥^𝑦. +/ +/ @param 𝑥 is double scalar in low half of %xmm0 +/ @param 𝑦 is int passed in %edi +/ @return double scalar in low half of %xmm0 +tinymath_powi: + cvtsi2sd %edi,%xmm1 + jmp tinymath_pow + .endfn tinymath_powi,globl + .alias tinymath_powi,powi diff --git a/libc/tinymath/powif.S b/libc/tinymath/powif.S new file mode 100644 index 00000000..79dce889 --- /dev/null +++ b/libc/tinymath/powif.S @@ -0,0 +1,31 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns 𝑥^𝑦. +/ +/ @param 𝑥 is float scalar in low quarter of %xmm0 +/ @param 𝑦 is int passed in %edi +/ @return double scalar in low half of %xmm0 +tinymath_powif: + cvtsi2ss %edi,%xmm1 + jmp tinymath_pow + .endfn tinymath_powif,globl + .alias tinymath_powif,powif diff --git a/libc/tinymath/powil.S b/libc/tinymath/powil.S new file mode 100644 index 00000000..98418c9f --- /dev/null +++ b/libc/tinymath/powil.S @@ -0,0 +1,41 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.h" + +/ Returns 𝑥^𝑦. +/ +/ @param 𝑥 is long double passed on stack +/ @param 𝑦 is int passed in %edi +/ @return %st +tinymath_powil: + push %rbp + mov %rsp,%rbp + .profilable + sub $32,%rsp + mov %edi,-4(%rbp) + fildl -4(%rbp) + fstpt (%rsp) + push 16+8(%rbp) + push 16+0(%rbp) + call tinymath_powl + leave + ret + .endfn tinymath_powil,globl + .alias tinymath_powil,powil diff --git a/libc/tinymath/rint.S b/libc/tinymath/rint.S index 1b83367d..599c447e 100644 --- a/libc/tinymath/rint.S +++ b/libc/tinymath/rint.S @@ -22,6 +22,11 @@ #include "libc/macros.h" .source __FILE__ +/ Rounds to nearest integer. +/ +/ @param is double passed in %xmm0 +/ @return double in %xmm0 +/ @note rounding behavior can be changed in mxcsr tinymath_rint: #if !X86_NEED(SSE4_2) testb X86_HAVE(SSE4_2)+kCpuids(%rip) diff --git a/libc/tinymath/rintl.S b/libc/tinymath/rintl.S index 22cd5880..fcc20971 100644 --- a/libc/tinymath/rintl.S +++ b/libc/tinymath/rintl.S @@ -20,6 +20,11 @@ #include "libc/macros.h" .source __FILE__ +/ Rounds to nearest integer. +/ +/ @param is long double passed on stack +/ @return long double in %st +/ @note rounding behavior can be changed in control word tinymath_rintl: push %rbp mov %rsp,%rbp diff --git a/libc/tinymath/tinymath.mk b/libc/tinymath/tinymath.mk index a3c8d594..12e9b7ae 100644 --- a/libc/tinymath/tinymath.mk +++ b/libc/tinymath/tinymath.mk @@ -6,6 +6,7 @@ PKGS += LIBC_TINYMATH LIBC_TINYMATH_ARTIFACTS += LIBC_TINYMATH_A LIBC_TINYMATH = $(LIBC_TINYMATH_A_DEPS) $(LIBC_TINYMATH_A) LIBC_TINYMATH_A = o/$(MODE)/libc/tinymath/tinymath.a +LIBC_TINYMATH_A_FILES := $(wildcard libc/tinymath/*) LIBC_TINYMATH_A_HDRS = $(filter %.h,$(LIBC_TINYMATH_A_FILES)) LIBC_TINYMATH_A_SRCS_A = $(filter %.s,$(LIBC_TINYMATH_A_FILES)) LIBC_TINYMATH_A_SRCS_S = $(filter %.S,$(LIBC_TINYMATH_A_FILES)) @@ -13,10 +14,6 @@ LIBC_TINYMATH_A_SRCS_C = $(filter %.c,$(LIBC_TINYMATH_A_FILES)) LIBC_TINYMATH_A_SRCS = $(LIBC_TINYMATH_A_SRCS_S) $(LIBC_TINYMATH_A_SRCS_C) LIBC_TINYMATH_A_CHECKS = $(LIBC_TINYMATH_A).pkg -LIBC_TINYMATH_A_FILES := \ - $(wildcard libc/tinymath/*) \ - $(wildcard libc/tinymath/delegates/*) - LIBC_TINYMATH_A_OBJS = \ $(LIBC_TINYMATH_A_SRCS:%=o/$(MODE)/%.zip.o) \ $(LIBC_TINYMATH_A_SRCS_A:%.s=o/$(MODE)/%.o) \ diff --git a/net/http/http.mk b/net/http/http.mk index a8b219f0..35051538 100644 --- a/net/http/http.mk +++ b/net/http/http.mk @@ -12,45 +12,45 @@ NET_HTTP_A_SRCS_S = $(filter %.S,$(NET_HTTP_A_FILES)) NET_HTTP_A_SRCS_C = $(filter %.c,$(NET_HTTP_A_FILES)) NET_HTTP_A_SRCS_R = $(filter %.rl,$(NET_HTTP_A_FILES)) -NET_HTTP_A_SRCS = \ - $(NET_HTTP_A_SRCS_S) \ - $(NET_HTTP_A_SRCS_C) \ +NET_HTTP_A_SRCS = \ + $(NET_HTTP_A_SRCS_S) \ + $(NET_HTTP_A_SRCS_C) \ $(NET_HTTP_A_SRCS_R) -NET_HTTP_A_OBJS = \ - $(NET_HTTP_A_SRCS:%=o/$(MODE)/%.zip.o) \ - $(NET_HTTP_A_SRCS_S:%.S=o/$(MODE)/%.o) \ - $(NET_HTTP_A_SRCS_C:%.c=o/$(MODE)/%.o) \ +NET_HTTP_A_OBJS = \ + $(NET_HTTP_A_SRCS:%=o/$(MODE)/%.zip.o) \ + $(NET_HTTP_A_SRCS_S:%.S=o/$(MODE)/%.o) \ + $(NET_HTTP_A_SRCS_C:%.c=o/$(MODE)/%.o) \ $(NET_HTTP_A_SRCS_R:%.rl=o/$(MODE)/%.o) -NET_HTTP_A_CHECKS = \ - $(NET_HTTP_A).pkg \ +NET_HTTP_A_CHECKS = \ + $(NET_HTTP_A).pkg \ $(NET_HTTP_A_HDRS:%=o/$(MODE)/%.ok) -NET_HTTP_A_DIRECTDEPS = \ - LIBC_ALG \ - LIBC_CALLS \ - LIBC_CONV \ - LIBC_FMT \ - LIBC_LOG \ - LIBC_NEXGEN32E \ - LIBC_RUNTIME \ - LIBC_SOCK \ - LIBC_STDIO \ - LIBC_STUBS \ - LIBC_SYSV \ - LIBC_TIME \ +NET_HTTP_A_DIRECTDEPS = \ + LIBC_ALG \ + LIBC_CALLS \ + LIBC_CONV \ + LIBC_FMT \ + LIBC_LOG \ + LIBC_NEXGEN32E \ + LIBC_RUNTIME \ + LIBC_SOCK \ + LIBC_STDIO \ + LIBC_STUBS \ + LIBC_SYSV \ + LIBC_TIME \ LIBC_X -NET_HTTP_A_DEPS := \ +NET_HTTP_A_DEPS := \ $(call uniq,$(foreach x,$(NET_HTTP_A_DIRECTDEPS),$($(x)))) -$(NET_HTTP_A): net/http/ \ - $(NET_HTTP_A).pkg \ +$(NET_HTTP_A): net/http/ \ + $(NET_HTTP_A).pkg \ $(NET_HTTP_A_OBJS) -$(NET_HTTP_A).pkg: \ - $(NET_HTTP_A_OBJS) \ +$(NET_HTTP_A).pkg: \ + $(NET_HTTP_A_OBJS) \ $(foreach x,$(NET_HTTP_A_DIRECTDEPS),$($(x)_A).pkg) NET_HTTP_LIBS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x))) @@ -61,8 +61,16 @@ NET_HTTP_OBJS = $(foreach x,$(NET_HTTP_ARTIFACTS),$($(x)_OBJS)) $(NET_HTTP_OBJS): $(BUILD_FILES) net/http/http.mk -.PRECIOUS: $(NET_HTTP_A_SRCS_R:%.rl=build/bootstrap/%.c) +.PRECIOUS: \ + $(NET_HTTP_A_SRCS_R:%.rl=build/bootstrap/%.c) \ + o/$(MODE)/net/http/uricspn.s \ + o/$(MODE)/net/http/uriparse.s \ + o/$(MODE)/net/http/uricspn.i \ + o/$(MODE)/net/http/uriparse.i \ + o/$(MODE)/net/http/uriparse.c \ + o/$(MODE)/net/http/uricspn.c + .PHONY: o/$(MODE)/net/http -o/$(MODE)/net/http: \ - $(NET_HTTP_CHECKS) \ +o/$(MODE)/net/http: \ + $(NET_HTTP_CHECKS) \ $(NET_HTTP_A_SRCS_R:%.rl=%.svgz) diff --git a/test/dsp/scale/scale_test.c b/test/dsp/scale/scale_test.c index fafedba5..97806f11 100644 --- a/test/dsp/scale/scale_test.c +++ b/test/dsp/scale/scale_test.c @@ -79,8 +79,8 @@ prpppppppppppppoooooooonnnnnnnnnnnnnnnooooooooppppppppppppptp\ pppppppppppppppppppoooooooooooooooooooooooppppppppppppppppppp"; TEST(gyarados, testIdentityDifference) { - unsigned char A[1][32][62]; - unsigned char B[1][32][62]; + static unsigned char A[1][32][62]; + static unsigned char B[1][32][62]; memcpy(A, kDieWelle, sizeof(A)); EzGyarados(1, 32, 61, B, 1, 32, 61, A, 0, 1, 32, 61, 32, 61, 1, 1, 0, 0); AbsoluteDifference(32, 62, B[0], 32, 62, B[0], 32, 62, A[0]); diff --git a/test/dsp/tty/test.mk b/test/dsp/tty/test.mk index 1717b9be..ca2f58bd 100644 --- a/test/dsp/tty/test.mk +++ b/test/dsp/tty/test.mk @@ -24,6 +24,7 @@ TEST_DSP_TTY_DIRECTDEPS = \ DSP_TTY \ LIBC_TINYMATH \ LIBC_LOG \ + LIBC_MEM \ LIBC_RUNTIME \ LIBC_RAND \ LIBC_NEXGEN32E \ diff --git a/test/libc/calls/hefty/commandv_test.c b/test/libc/calls/commandv_test.c similarity index 90% rename from test/libc/calls/hefty/commandv_test.c rename to test/libc/calls/commandv_test.c index af184953..ed595fd9 100644 --- a/test/libc/calls/hefty/commandv_test.c +++ b/test/libc/calls/commandv_test.c @@ -28,6 +28,7 @@ #include "libc/x/x.h" uint64_t i; +char pathbuf[PATH_MAX]; const char *oldpath, *bindir, *homedir, *binsh, *sh; TEST(commandv_00, todo) { /* TODO(jart): Improve this on Windows. */ @@ -51,20 +52,10 @@ TEST(commandv_010, testSlashes_wontSearchPath_butChecksAccess) { sh = defer(unlink, gc(xasprintf("%s/sh.com", homedir))); EXPECT_NE(-1, touch(sh, 0755)); i = g_syscount; - EXPECT_STREQ(sh, commandv(sh)); + EXPECT_STREQ(sh, commandv(sh, pathbuf)); if (!IsWindows()) EXPECT_EQ(i + 1 /* access() */, g_syscount); } -TEST(commandv_010, testNoSlashes_searchesPath_withMemoization) { - if (IsTiny()) return; - i = g_syscount; - EXPECT_STREQ(binsh, commandv("sh.com")); - if (!IsWindows()) EXPECT_GT(g_syscount, i); - i = g_syscount; - EXPECT_STREQ(binsh, commandv("sh.com")); - if (!IsWindows()) EXPECT_EQ(g_syscount, i); -} - TEST(commandv_999, teardown) { setenv("PATH", oldpath, true); unlink(binsh); diff --git a/test/libc/crypto/rijndael_test.c b/test/libc/crypto/rijndael_test.c index 45a47a6f..71730c89 100644 --- a/test/libc/crypto/rijndael_test.c +++ b/test/libc/crypto/rijndael_test.c @@ -19,7 +19,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/crypto/rijndael.h" #include "libc/dce.h" +#include "libc/fmt/bing.h" +#include "libc/runtime/internal.h" #include "libc/str/str.h" +#include "libc/sysv/consts/prot.h" #include "libc/testlib/testlib.h" /** diff --git a/test/libc/crypto/test.mk b/test/libc/crypto/test.mk index 5772e2ec..8511bbe1 100644 --- a/test/libc/crypto/test.mk +++ b/test/libc/crypto/test.mk @@ -23,6 +23,8 @@ TEST_LIBC_CRYPTO_CHECKS = \ TEST_LIBC_CRYPTO_DIRECTDEPS = \ LIBC_CRYPTO \ + LIBC_RUNTIME \ + LIBC_FMT \ LIBC_NEXGEN32E \ LIBC_STUBS \ LIBC_TESTLIB diff --git a/test/libc/mem/test.mk b/test/libc/mem/test.mk index 342c1c22..1b1e64e0 100644 --- a/test/libc/mem/test.mk +++ b/test/libc/mem/test.mk @@ -51,8 +51,8 @@ o/$(MODE)/test/libc/mem/%.com.dbg: \ @$(APELINK) $(TEST_LIBC_MEM_OBJS): \ - DEFAULT_CCFLAGS += \ - -fno-builtin + DEFAULT_CCFLAGS += \ + -fno-builtin .PHONY: o/$(MODE)/test/libc/mem o/$(MODE)/test/libc/mem: \ diff --git a/test/libc/runtime/grow_test.c b/test/libc/runtime/grow_test.c index bfba721f..f738c029 100644 --- a/test/libc/runtime/grow_test.c +++ b/test/libc/runtime/grow_test.c @@ -47,16 +47,17 @@ TEST(grow, testStackMemory_convertsToDynamic) { int A[] = {1, 2, 3}; int *p = A; size_t capacity = ARRAYLEN(A); - EXPECT_FALSE(isheap(p)); - EXPECT_TRUE(grow(&p, &capacity, sizeof(int), 0)); - EXPECT_TRUE(isheap(p)); - EXPECT_GT(capacity, ARRAYLEN(A)); - EXPECT_EQ(1, p[0]); - EXPECT_EQ(2, p[1]); - EXPECT_EQ(3, p[2]); - p[0] = 7; - EXPECT_EQ(1, A[0]); - free(p); + if (!isheap(p)) { + EXPECT_TRUE(grow(&p, &capacity, sizeof(int), 0)); + EXPECT_TRUE(isheap(p)); + EXPECT_GT(capacity, ARRAYLEN(A)); + EXPECT_EQ(1, p[0]); + EXPECT_EQ(2, p[1]); + EXPECT_EQ(3, p[2]); + p[0] = 7; + EXPECT_EQ(1, A[0]); + free(p); + } } TEST(grow, testGrowth_clearsNewMemory) { @@ -86,12 +87,13 @@ TEST(grow, testOverflow_returnsFalseAndDoesNotFree) { int A[] = {1, 2, 3}; int *p = A; size_t capacity = ARRAYLEN(A); - EXPECT_FALSE(isheap(p)); - EXPECT_FALSE(grow(&p, &capacity, pushpop(SIZE_MAX), 0)); - EXPECT_FALSE(isheap(p)); - EXPECT_EQ(capacity, ARRAYLEN(A)); - EXPECT_EQ(1, p[0]); - EXPECT_EQ(2, p[1]); - EXPECT_EQ(3, p[2]); - free_s(&p); + if (!isheap(p)) { + EXPECT_FALSE(grow(&p, &capacity, pushpop(SIZE_MAX), 0)); + EXPECT_FALSE(isheap(p)); + EXPECT_EQ(capacity, ARRAYLEN(A)); + EXPECT_EQ(1, p[0]); + EXPECT_EQ(2, p[1]); + EXPECT_EQ(3, p[2]); + free_s(&p); + } } diff --git a/test/libc/runtime/mmap_test.c b/test/libc/runtime/mmap_test.c index 58fb4dbb..8680bb67 100644 --- a/test/libc/runtime/mmap_test.c +++ b/test/libc/runtime/mmap_test.c @@ -72,32 +72,31 @@ TEST(mmap, testMapFile_fdGetsClosed_makesNoDifference) { TEST(mmap, testMapFixed_destroysEverythingInItsPath) { unsigned m1 = _mmi.i; - EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedMappingsStart + FRAMESIZE * 0), + EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedmapStart + FRAMESIZE * 0), FRAMESIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); - EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedMappingsStart + FRAMESIZE * 1), + EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedmapStart + FRAMESIZE * 1), FRAMESIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); - EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedMappingsStart + FRAMESIZE * 2), + EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedmapStart + FRAMESIZE * 2), FRAMESIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); - EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedMappingsStart + FRAMESIZE * 0), + EXPECT_NE(MAP_FAILED, mmap((void *)(kFixedmapStart + FRAMESIZE * 0), FRAMESIZE * 3, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); ASSERT_GT(_mmi.i, m1); - EXPECT_NE(-1, munmap((void *)kFixedMappingsStart, FRAMESIZE * 3)); + EXPECT_NE(-1, munmap((void *)kFixedmapStart, FRAMESIZE * 3)); +#ifdef __SANITIZE_ADDRESS__ + ASSERT_EQ(m1 + 1, _mmi.i); +#else ASSERT_EQ(m1, _mmi.i); +#endif } TEST(isheap, nullPtr) { ASSERT_FALSE(isheap(NULL)); } -TEST(isheap, stackMemory) { - int boop; - ASSERT_FALSE(isheap(&boop)); -} - TEST(isheap, malloc) { ASSERT_TRUE(isheap(gc(malloc(1)))); } diff --git a/test/libc/test.mk b/test/libc/test.mk index 32889e72..1361bae6 100644 --- a/test/libc/test.mk +++ b/test/libc/test.mk @@ -7,6 +7,7 @@ o/$(MODE)/test/libc: \ o/$(MODE)/test/libc/bits \ o/$(MODE)/test/libc/calls \ o/$(MODE)/test/libc/conv \ + o/$(MODE)/test/libc/crypto \ o/$(MODE)/test/libc/dns \ o/$(MODE)/test/libc/fmt \ o/$(MODE)/test/libc/intrin \ diff --git a/test/tool/build/lib/machine_test.c b/test/tool/build/lib/machine_test.c index f21be6a9..89ec4946 100644 --- a/test/tool/build/lib/machine_test.c +++ b/test/tool/build/lib/machine_test.c @@ -294,5 +294,5 @@ BENCH(machine, benchNop) { } TEST(machine, sizeIsReasonable) { - ASSERT_LE(sizeof(struct Machine), 65536); + ASSERT_LE(sizeof(struct Machine), 65536 * 2); } diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 1482725d..cb75f6e2 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -3,7 +3,6 @@ #include "libc/calls/internal.h" #include "libc/calls/struct/sysinfo.h" #include "libc/conv/conv.h" -#include "libc/conv/sizemultiply.h" #include "libc/dce.h" #include "libc/limits.h" #include "libc/macros.h" @@ -861,6 +860,22 @@ void dlfree(void *mem) { #endif /* FOOTERS */ } +/** + * Multiplies sizes w/ saturation and overflow detection. + * + * @param count may be 0 to for realloc() → free() behavior + * @param opt_out set to count*itemsize or SIZE_MAX on overflow + * @return true on success or false on overflow + */ +static bool sizemultiply(size_t *opt_out, size_t count, size_t itemsize) { + size_t result; + bool overflowed; + overflowed = __builtin_mul_overflow(count, itemsize, &result); + if (overflowed) result = SIZE_MAX; + if (opt_out) *opt_out = result; + return !overflowed; +} + void *dlcalloc(size_t n_elements, size_t elem_size) { void *mem; size_t req; diff --git a/third_party/dlmalloc/dlmalloc.mk b/third_party/dlmalloc/dlmalloc.mk index 7a12392a..be00d331 100644 --- a/third_party/dlmalloc/dlmalloc.mk +++ b/third_party/dlmalloc/dlmalloc.mk @@ -25,6 +25,7 @@ THIRD_PARTY_DLMALLOC_A_CHECKS = \ $(THIRD_PARTY_DLMALLOC_A_HDRS:%=o/$(MODE)/%.ok) THIRD_PARTY_DLMALLOC_A_DIRECTDEPS = \ + LIBC_BITS \ LIBC_CALLS \ LIBC_CONV \ LIBC_FMT \ @@ -47,6 +48,10 @@ $(THIRD_PARTY_DLMALLOC_A).pkg: \ $(THIRD_PARTY_DLMALLOC_A_OBJS) \ $(foreach x,$(THIRD_PARTY_DLMALLOC_A_DIRECTDEPS),$($(x)_A).pkg) +$(THIRD_PARTY_DLMALLOC_A_OBJS): \ + OVERRIDE_CFLAGS += \ + $(NO_MAGIC) + THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x))) THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS)) diff --git a/third_party/dlmalloc/dlmemalign-impl.c b/third_party/dlmalloc/dlmemalign-impl.c index 7d2f0819..a29b85cd 100644 --- a/third_party/dlmalloc/dlmemalign-impl.c +++ b/third_party/dlmalloc/dlmemalign-impl.c @@ -1,15 +1,15 @@ +#include "libc/bits/bits.h" #include "libc/mem/mem.h" #include "libc/sysv/errfuns.h" #include "third_party/dlmalloc/dlmalloc.h" void* dlmemalign$impl(mstate m, size_t alignment, size_t bytes) { void* mem = 0; - if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ - alignment = MIN_CHUNK_SIZE; + if (alignment < MIN_CHUNK_SIZE) { /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; /* is 32 bytes on NexGen32e */ + } if ((alignment & (alignment - SIZE_T_ONE)) != 0) { /* Ensure a power of 2 */ - size_t a = MALLOC_ALIGNMENT << 1; - while (a < alignment) a <<= 1; - alignment = a; + alignment = roundup2pow(alignment); } if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ diff --git a/third_party/dlmalloc/dlmemalign.c b/third_party/dlmalloc/dlmemalign.c index 7687f2a8..07bfbefb 100644 --- a/third_party/dlmalloc/dlmemalign.c +++ b/third_party/dlmalloc/dlmemalign.c @@ -1,9 +1,7 @@ -#include "third_party/dlmalloc/dlmalloc.h" #include "libc/mem/mem.h" +#include "third_party/dlmalloc/dlmalloc.h" void *dlmemalign(size_t alignment, size_t bytes) { - if (alignment <= MALLOC_ALIGNMENT) { - return dlmalloc(bytes); - } + if (alignment <= MALLOC_ALIGNMENT) return dlmalloc(bytes); return dlmemalign$impl(gm, alignment, bytes); } diff --git a/third_party/dlmalloc/dlposix_memalign.c b/third_party/dlmalloc/dlposix_memalign.c index d3bb5df1..af0839fa 100644 --- a/third_party/dlmalloc/dlposix_memalign.c +++ b/third_party/dlmalloc/dlposix_memalign.c @@ -1,24 +1,27 @@ #include "libc/errno.h" #include "libc/mem/mem.h" +#include "libc/sysv/errfuns.h" #include "third_party/dlmalloc/dlmalloc.h" int dlposix_memalign(void** pp, size_t alignment, size_t bytes) { - void* mem = 0; - if (alignment == MALLOC_ALIGNMENT) + void* mem; + size_t d, r; + mem = NULL; + if (alignment == MALLOC_ALIGNMENT) { mem = dlmalloc(bytes); - else { - size_t d = alignment / sizeof(void*); - size_t r = alignment % sizeof(void*); - if (r != 0 || d == 0 || (d & (d - SIZE_T_ONE)) != 0) - return EINVAL; - else if (bytes <= MAX_REQUEST - alignment) { + } else { + d = alignment / sizeof(void*); + r = alignment % sizeof(void*); + if (r != 0 || d == 0 || (d & (d - SIZE_T_ONE)) != 0) { + return einval(); + } else if (bytes <= MAX_REQUEST - alignment) { if (alignment < MIN_CHUNK_SIZE) alignment = MIN_CHUNK_SIZE; mem = dlmemalign$impl(gm, alignment, bytes); } } - if (mem == 0) - return ENOMEM; - else { + if (mem == 0) { + return enomem(); + } else { *pp = mem; return 0; } diff --git a/tool/build/build.mk b/tool/build/build.mk index c43206d1..9a35561c 100644 --- a/tool/build/build.mk +++ b/tool/build/build.mk @@ -93,6 +93,10 @@ o/$(MODE)/tool/build/emulator.o: \ OVERRIDE_COPTS += \ -fno-sanitize=pointer-overflow +# $(TOOL_BUILD_OBJS): \ +# OVERRIDE_CFLAGS += \ +# -fsanitize=address + .PHONY: o/$(MODE)/tool/build o/$(MODE)/tool/build: \ o/$(MODE)/tool/build/emucrt \ diff --git a/tool/build/emulator.c b/tool/build/emulator.c index 009028b8..fd0266df 100644 --- a/tool/build/emulator.c +++ b/tool/build/emulator.c @@ -456,7 +456,7 @@ static void SetupDraw(void) { a = 1 / 8. * tyn; b = 3 / 8. * tyn; - c2y[0] = a; + c2y[0] = a * .7; c2y[1] = a * 2; c2y[2] = a * 2 + b; @@ -1590,9 +1590,23 @@ Restart: LeaveScreen(); } if (printstats) { + int i; + extern long opcount[256 * 4]; fprintf(stderr, "taken: %,ld\n", taken); fprintf(stderr, "ntaken: %,ld\n", ntaken); fprintf(stderr, "ops: %,ld\n", ops); + for (i = 0x51; i < 0x58; ++i) opcount[0x50] += opcount[i]; + for (i = 0x51; i < 0x58; ++i) opcount[i] = 0; + for (i = 0x59; i < 0x60; ++i) opcount[0x58] += opcount[i]; + for (i = 0x59; i < 0x60; ++i) opcount[i] = 0; + for (i = 0x91; i < 0x98; ++i) opcount[0x90] += opcount[i]; + for (i = 0x91; i < 0x98; ++i) opcount[i] = 0; + for (i = 0x71; i < 0x80; ++i) opcount[0x70] += opcount[i]; + for (i = 0x71; i < 0x80; ++i) opcount[i] = 0; + for (i = 0; i < ARRAYLEN(opcount); ++i) { + if (!opcount[i]) continue; + fprintf(stderr, "0x%03x %ld\n", i, opcount[i]); + } } munmap(elf->ehdr, elf->size); DisFree(dis); diff --git a/tool/build/lib/buildlib.mk b/tool/build/lib/buildlib.mk index 30bb45c5..17681816 100644 --- a/tool/build/lib/buildlib.mk +++ b/tool/build/lib/buildlib.mk @@ -63,7 +63,9 @@ $(TOOL_BUILD_LIB_A).pkg: \ $(TOOL_BUILD_LIB_A_OBJS) \ $(foreach x,$(TOOL_BUILD_LIB_A_DIRECTDEPS),$($(x)_A).pkg) -o/$(MODE)/tool/build/lib/fpu.o: OVERRIDE_CFLAGS += -ffast-math +# $(TOOL_BUILD_LIB_A_OBJS): \ +# OVERRIDE_CFLAGS += \ +# -fsanitize=address TOOL_BUILD_LIB_LIBS = $(foreach x,$(TOOL_BUILD_LIB_ARTIFACTS),$($(x))) TOOL_BUILD_LIB_SRCS = $(foreach x,$(TOOL_BUILD_LIB_ARTIFACTS),$($(x)_SRCS)) diff --git a/tool/build/lib/cvt.c b/tool/build/lib/cvt.c index 679b1c0c..a8a7f796 100644 --- a/tool/build/lib/cvt.c +++ b/tool/build/lib/cvt.c @@ -29,7 +29,7 @@ static double SseRoundDouble(struct Machine *m, double x) { switch (m->sse.rc) { case 0: - return nearbyint(x); + return rint(x); case 1: return floor(x); case 2: @@ -134,7 +134,7 @@ static void OpPpiWpsqCvtps2pi(struct Machine *m, uint32_t rde) { memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8); switch (m->sse.rc) { case 0: - for (i = 0; i < 2; ++i) n[i] = nearbyintf(f[i]); + for (i = 0; i < 2; ++i) n[i] = rintf(f[i]); break; case 1: for (i = 0; i < 2; ++i) n[i] = floorf(f[i]); @@ -250,7 +250,7 @@ static void OpVdqWpsCvtps2dq(struct Machine *m, uint32_t rde) { memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16); switch (m->sse.rc) { case 0: - for (i = 0; i < 4; ++i) n[i] = nearbyintf(f[i]); + for (i = 0; i < 4; ++i) n[i] = rintf(f[i]); break; case 1: for (i = 0; i < 4; ++i) n[i] = floorf(f[i]); diff --git a/tool/build/lib/dis.c b/tool/build/lib/dis.c index 189de010..9d272483 100644 --- a/tool/build/lib/dis.c +++ b/tool/build/lib/dis.c @@ -173,7 +173,7 @@ void Dis(struct Dis *d, struct Machine *m, int64_t addr) { struct DisOp op; long i, j, n, si, max, toto, symbol; unique = 0; - max = 99999; + max = 999999; DisFreeOps(&d->ops); for (i = 0; i < max; ++i) { xed_decoded_inst_zero_set_mode(d->xedd, XED_MACHINE_MODE_LONG_64); @@ -224,10 +224,12 @@ void Dis(struct Dis *d, struct Machine *m, int64_t addr) { xed_instruction_length_decode(d->xedd, d->raw, n); DCHECK_GT(n, 0); p = DisLineCode((struct DisBuilder){d, d->xedd, addr}, d->buf); + CHECK_LT(p - d->buf, sizeof(d->buf)); n = d->xedd->op.error ? 1 : d->xedd->length; DCHECK_GT(n, 0); } else { p = DisLineData((struct DisBuilder){d, d->xedd, addr}, d->buf, d->raw, n); + CHECK_LT(p - d->buf, sizeof(d->buf)); } DCHECK_LT(p, d->buf + sizeof(d->buf)); DCHECK_LT(strlen(d->buf), sizeof(d->buf)); diff --git a/tool/build/lib/disinst.c b/tool/build/lib/disinst.c index 0fd8c72e..59f4d461 100644 --- a/tool/build/lib/disinst.c +++ b/tool/build/lib/disinst.c @@ -175,8 +175,8 @@ static char *DisName(struct DisBuilder b, char *bp, const char *name, */ char *DisInst(struct DisBuilder b, char *p, const char *spec) { long i, n; - char sbuf[128]; - char args[4][64]; + char sbuf[256]; + char args[4][128]; char *s, *name, *state; bool hasarg, hasmodrm, hasregister, hasmemory; CHECK_EQ(0, (int)b.xedd->op.error); @@ -190,7 +190,7 @@ char *DisInst(struct DisBuilder b, char *p, const char *spec) { hasarg = true; hasregister |= *s == '%'; hasmemory |= *s == 'O'; - DisArg(b, args[n], s); + CHECK_LT(DisArg(b, args[n], s) - args[n], sizeof(args[n])); } if (g_dis_high) p = DisHigh(p, g_dis_high->keyword); p = DisName(b, p, name, hasarg && !hasregister && hasmemory); diff --git a/tool/build/lib/elfwriter.c b/tool/build/lib/elfwriter.c index 71a98260..9afd3a0a 100644 --- a/tool/build/lib/elfwriter.c +++ b/tool/build/lib/elfwriter.c @@ -164,7 +164,7 @@ struct ElfWriter *elfwriter_open(const char *path, int mode) { CHECK_NE(-1, (elf->fd = open(elf->tmppath, O_CREAT | O_TRUNC | O_RDWR | O_EXCL, mode))); CHECK_NE(-1, ftruncate(elf->fd, (elf->mapsize = FRAMESIZE))); - CHECK_NE(MAP_FAILED, (elf->map = mmap((void *)(intptr_t)kFixedMappingsStart, + CHECK_NE(MAP_FAILED, (elf->map = mmap((void *)(intptr_t)kFixedmapStart, elf->mapsize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, elf->fd, 0))); elf->ehdr = memcpy(elf->map, &kObjHeader, (elf->wrote = sizeof(kObjHeader))); diff --git a/tool/build/lib/fpu.c b/tool/build/lib/fpu.c index 37256a75..afaadfa8 100644 --- a/tool/build/lib/fpu.c +++ b/tool/build/lib/fpu.c @@ -235,7 +235,7 @@ static long double FpuDiv(struct Machine *m, long double x, long double y) { static long double FpuRound(struct Machine *m, long double x) { switch (m->fpu.rc) { case 0: - return nearbyintl(x); + return rintl(x); case 1: return floorl(x); case 2: diff --git a/tool/build/lib/machine.c b/tool/build/lib/machine.c index 9cbefd45..c0de68e2 100644 --- a/tool/build/lib/machine.c +++ b/tool/build/lib/machine.c @@ -59,7 +59,7 @@ #define MUTATING true #define READONLY false #define UNCONDITIONAL true -#define BITS (8 << RegLog2(rde)) +#define BITS (8u << RegLog2(rde)) #define SIGN (1ull << (BITS - 1)) #define MASK (SIGN | (SIGN - 1)) #define SHIFTMASK (BITS - 1) @@ -1330,8 +1330,8 @@ static void OpSqrtpsd(struct Machine *m, uint32_t rde) { } static void OpRsqrtps(struct Machine *m, uint32_t rde) { - unsigned i; float_v x; + unsigned i; if (Rep(rde) != 3) { memcpy(&x, GetModrmRegisterXmmPointerRead16(m, rde), 16); for (i = 0; i < 4; ++i) x[i] = 1.f / sqrtf(x[i]); @@ -1344,8 +1344,8 @@ static void OpRsqrtps(struct Machine *m, uint32_t rde) { } static void OpRcpps(struct Machine *m, uint32_t rde) { - int i; float_v x; + unsigned i; if (Rep(rde) != 3) { memcpy(&x, GetModrmRegisterXmmPointerRead16(m, rde), 16); for (i = 0; i < 4; ++i) x[i] = 1.f / x[i]; @@ -1551,7 +1551,7 @@ static float_v OpAddsubps(struct Machine *m, float_v x, float_v y) { } static float_v OpMinps(struct Machine *m, float_v x, float_v y) { - int i; + unsigned i; for (i = 0; i < 4; ++i) { x[i] = MIN(x[i], y[i]); } @@ -1559,7 +1559,7 @@ static float_v OpMinps(struct Machine *m, float_v x, float_v y) { } static double_v OpMinpd(struct Machine *m, double_v x, double_v y) { - int i; + unsigned i; for (i = 0; i < 4; ++i) { x[i] = MIN(x[i], y[i]); } @@ -1567,7 +1567,7 @@ static double_v OpMinpd(struct Machine *m, double_v x, double_v y) { } static float_v OpMaxps(struct Machine *m, float_v x, float_v y) { - int i; + unsigned i; for (i = 0; i < 4; ++i) { x[i] = MAX(x[i], y[i]); } @@ -1575,7 +1575,7 @@ static float_v OpMaxps(struct Machine *m, float_v x, float_v y) { } static double_v OpMaxpd(struct Machine *m, double_v x, double_v y) { - int i; + unsigned i; for (i = 0; i < 4; ++i) { x[i] = MAX(x[i], y[i]); } @@ -1721,9 +1721,10 @@ static void OpAlubFlipRo(struct Machine *m, uint32_t rde, int h) { } static void OpAlubi(struct Machine *m, uint32_t rde, int h) { - uint8_t *a; + uint8_t *a, x; a = GetModrmRegisterBytePointerWrite(m, rde); - Write8(a, Alu(0, h, Read8(a), m->xedd->op.uimm0, &m->flags)); + x = Alu(0, h, Read8(a), m->xedd->op.uimm0, &m->flags); + if (h != ALU_CMP) Write8(a, x); } static void OpAlubiRo(struct Machine *m, uint32_t rde, int h) { @@ -1761,10 +1762,10 @@ static void OpAluwFlipRo(struct Machine *m, uint32_t rde, int h) { static void OpAluwi(struct Machine *m, uint32_t rde, int h) { uint8_t *a; + uint64_t x; a = GetModrmRegisterWordPointerWriteOszRexw(m, rde); - WriteRegisterOrMemory( - rde, a, - Alu(RegLog2(rde), h, ReadMemory(rde, a), m->xedd->op.uimm0, &m->flags)); + x = Alu(RegLog2(rde), h, ReadMemory(rde, a), m->xedd->op.uimm0, &m->flags); + if (h != ALU_CMP) WriteRegisterOrMemory(rde, a, x); } static void OpAluwiRo(struct Machine *m, uint32_t rde, int h) { @@ -1823,11 +1824,32 @@ static void OpTestRaxIvds(struct Machine *m, uint32_t rde) { &m->flags); } +long opcount[256 * 4]; + void ExecuteInstruction(struct Machine *m) { uint32_t rde; m->ip += m->xedd->length; rde = m->xedd->op.rde; + opcount[m->xedd->op.map << 8 | m->xedd->op.opcode]++; switch (m->xedd->op.map << 8 | m->xedd->op.opcode) { + CASE(0x089, OpMovEvqpGvqp(m, rde)); + CASE(0x083, OpAluwi(m, rde, ModrmReg(rde))); + CASR(0x070, if (GetCond(m, 0x0)) OpJmp(m)); + CASR(0x071, if (GetCond(m, 0x1)) OpJmp(m)); + CASR(0x072, if (GetCond(m, 0x2)) OpJmp(m)); + CASR(0x073, if (GetCond(m, 0x3)) OpJmp(m)); + CASR(0x074, if (GetCond(m, 0x4)) OpJmp(m)); + CASR(0x075, if (GetCond(m, 0x5)) OpJmp(m)); + CASR(0x076, if (GetCond(m, 0x6)) OpJmp(m)); + CASR(0x077, if (GetCond(m, 0x7)) OpJmp(m)); + CASR(0x078, if (GetCond(m, 0x8)) OpJmp(m)); + CASR(0x079, if (GetCond(m, 0x9)) OpJmp(m)); + CASR(0x07A, if (GetCond(m, 0xa)) OpJmp(m)); + CASR(0x07B, if (GetCond(m, 0xb)) OpJmp(m)); + CASR(0x07C, if (GetCond(m, 0xc)) OpJmp(m)); + CASR(0x07D, if (GetCond(m, 0xd)) OpJmp(m)); + CASR(0x07E, if (GetCond(m, 0xe)) OpJmp(m)); + CASR(0x07F, if (GetCond(m, 0xf)) OpJmp(m)); CASR(0x0B0 ... 0x0B7, OpMovZbIb(m, rde)); CASR(0x0B8 ... 0x0BF, OpMovZvqpIvqp(m, rde)); CASR(0x050 ... 0x057, OpPushZvq(m, rde)); @@ -1891,32 +1913,14 @@ void ExecuteInstruction(struct Machine *m) { CASE(0x06D, OpString(m, rde, STRING_INS)); CASE(0x06E, OpString(m, rde, STRING_OUTS)); CASE(0x06F, OpString(m, rde, STRING_OUTS)); - CASR(0x070, if (GetCond(m, 0x0)) OpJmp(m)); - CASR(0x071, if (GetCond(m, 0x1)) OpJmp(m)); - CASR(0x072, if (GetCond(m, 0x2)) OpJmp(m)); - CASR(0x073, if (GetCond(m, 0x3)) OpJmp(m)); - CASR(0x074, if (GetCond(m, 0x4)) OpJmp(m)); - CASR(0x075, if (GetCond(m, 0x5)) OpJmp(m)); - CASR(0x076, if (GetCond(m, 0x6)) OpJmp(m)); - CASR(0x077, if (GetCond(m, 0x7)) OpJmp(m)); - CASR(0x078, if (GetCond(m, 0x8)) OpJmp(m)); - CASR(0x079, if (GetCond(m, 0x9)) OpJmp(m)); - CASR(0x07A, if (GetCond(m, 0xa)) OpJmp(m)); - CASR(0x07B, if (GetCond(m, 0xb)) OpJmp(m)); - CASR(0x07C, if (GetCond(m, 0xc)) OpJmp(m)); - CASR(0x07D, if (GetCond(m, 0xd)) OpJmp(m)); - CASR(0x07E, if (GetCond(m, 0xe)) OpJmp(m)); - CASR(0x07F, if (GetCond(m, 0xf)) OpJmp(m)); CASR(0x080, OpAlubi(m, rde, ModrmReg(rde))); CASE(0x081, OpAluwi(m, rde, ModrmReg(rde))); CASR(0x082, OpAlubi(m, rde, ModrmReg(rde))); - CASE(0x083, OpAluwi(m, rde, ModrmReg(rde))); CASR(0x084, OpAlubRo(m, rde, TEST)); CASE(0x085, OpAluwRo(m, rde, TEST)); CASE(0x086, OpXchgGbEb(m, rde)); CASE(0x087, OpXchgGvqpEvqp(m, rde)); CASE(0x088, OpMovEbGb(m, rde)); - CASE(0x089, OpMovEvqpGvqp(m, rde)); CASE(0x08A, OpMovGbEb(m, rde)); CASE(0x08B, OpMovGvqpEvqp(m, rde)); CASE(0x08C, OpMovEvqpSw(m)); diff --git a/tool/build/lib/machine.h b/tool/build/lib/machine.h index 55334dae..0f8fee5b 100644 --- a/tool/build/lib/machine.h +++ b/tool/build/lib/machine.h @@ -136,7 +136,7 @@ struct Machine { int64_t faultaddr; uint8_t stash[4096]; uint8_t xmmtype[2][8]; - struct XedDecodedInst icache[512]; + struct XedDecodedInst icache[1024]; struct MachineFds fds; }; diff --git a/tool/build/lib/sse.c b/tool/build/lib/sse.c index 3a7eab2c..039933ac 100644 --- a/tool/build/lib/sse.c +++ b/tool/build/lib/sse.c @@ -117,14 +117,13 @@ union MachineVector { void OpSse(struct Machine *m, uint32_t rde, enum OpSseKernel kernel) { int i; uint8_t *p; - union MachineVector x, y, t; + union MachineVector x, y; p = GetModrmRegisterXmmPointerRead16(m, rde); if (Osz(rde)) { memcpy(&y, p, 16); } else { - memset(&t, 0, 16); - memcpy(&t, p, 8); - memcpy(&y, &t, 16); + memset(&y, 0, 16); + memcpy(&y, p, 8); } memcpy(&x, XmmRexrReg(m, rde), 16); switch (kernel) { diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index 0c08cc62..6754cfba 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -219,7 +219,7 @@ void LoadRelationships(int argc, char *argv[]) { } CHECK_NE(-1, fclose(finpaths)); } - free_s(&line); + free(line); } void GetOpts(int argc, char *argv[]) { diff --git a/tool/build/runit.c b/tool/build/runit.c index fd9ca651..d4de766e 100644 --- a/tool/build/runit.c +++ b/tool/build/runit.c @@ -106,9 +106,13 @@ static const struct addrinfo kResolvHints = {.ai_family = AF_INET, .ai_protocol = IPPROTO_TCP}; int g_sock; +char *g_prog; +char *g_runitd; jmp_buf g_jmpbuf; -uint16_t g_sshport, g_runitdport; -char *g_prog, *g_runitd, *g_ssh, g_hostname[128]; +uint16_t g_sshport; +uint16_t g_runitdport; +char g_ssh[PATH_MAX]; +char g_hostname[128]; forceinline pureconst size_t GreatestTwoDivisor(size_t x) { return x & (~x + 1); @@ -389,7 +393,7 @@ int main(int argc, char *argv[]) { unreachable; } if (argc < 1 + 2) ShowUsage(stderr, EX_USAGE); - CHECK_NOTNULL((g_ssh = commandv(firstnonnull(getenv("SSH"), "ssh")))); + CHECK_NOTNULL(commandv(firstnonnull(getenv("SSH"), "ssh"), g_ssh)); CheckExists((g_runitd = argv[1])); CheckExists((g_prog = argv[2])); if (argc == 1 + 2) return 0; /* hosts list empty */ diff --git a/tool/debug/debug.mk b/tool/debug/debug.mk deleted file mode 100644 index a58c3de8..00000000 --- a/tool/debug/debug.mk +++ /dev/null @@ -1,33 +0,0 @@ -#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ -#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ - -PKGS += TOOL_DEBUG - -TOOL_DEBUG_SRCS := $(wildcard tool/debug/*.c) -TOOL_DEBUG_OBJS = $(TOOL_DEBUG_SRCS:%.c=o/$(MODE)/%.o) -TOOL_DEBUG_COMS = $(TOOL_DEBUG_OBJS:%.o=%.com) - -TOOL_DEBUG_DEPS := $(call uniq, \ - $(LIBC_STR) \ - $(LIBC_RUNTIME) \ - $(LIBC_STDIO) \ - $(LIBC_X) \ - $(LIBC_LOG)) - -TOOL_DEBUG_BINS = \ - $(TOOL_DEBUG_COMS) \ - $(TOOL_DEBUG_COMS:%=%.dbg) - -o/$(MODE)/tool/debug/%.com.dbg: \ - $(TOOL_DEBUG_DEPS) \ - o/$(MODE)/tool/debug/%.o \ - $(CRT) \ - $(APE) - @$(APELINK) - -$(TOOL_DEBUG_OBJS): \ - $(BUILD_FILES) \ - tool/debug/debug.mk - -.PHONY: o/$(MODE)/tool/debug -o/$(MODE)/tool/debug: $(TOOL_DEBUG_BINS) $(TOOL_DEBUG_CHECKS) diff --git a/tool/emacs/cosmo-asm-mode.el b/tool/emacs/cosmo-asm-mode.el index 6e07c34c..1bd8e46a 100644 --- a/tool/emacs/cosmo-asm-mode.el +++ b/tool/emacs/cosmo-asm-mode.el @@ -169,7 +169,7 @@ ;; * mov $'c,%eax ;; * mov $'\n,%eax ;; - ("[ \t,]\\$\\(\\(?:'\\(?:'\\|\\s\"\\|\\s\\.\\|.\\)\\|\\(?:0x[[:xdigit:]]+\\|0b[01]+\\|[1-9][0-9]*\\|0[0-7]*\\)\\(?:[fb]\\|u?l?l?\\)\\|[-*/&^|()%<>~+]\\|[_.[:alpha:]][-_.[:alnum:]]*\\)+\\)" + ("[ \t,]\\$\\(\\(?:'\\(?:'\\|\\s\"\\|\\s\\.\\|.\\)\\|\\(?:0x[[:xdigit:]]+\\|0b[01]+\\|[1-9][0-9]*\\|0[0-7]*\\)\\(?:[fb]\\|u?l?l?\\)\\|[-*/&^|()%<>~+]\\|[_.$[:alpha:]][-_.[:alnum:]]*\\)+\\)" 1 font-lock-constant-face) (cosmo-asm-doctag-keywords) diff --git a/tool/emacs/cosmo-c-builtins.el b/tool/emacs/cosmo-c-builtins.el index ce3ae5f7..8c0388e5 100644 --- a/tool/emacs/cosmo-c-builtins.el +++ b/tool/emacs/cosmo-c-builtins.el @@ -230,6 +230,21 @@ "__builtin_lround" "__builtin_lroundf" "__builtin_lroundl" + "__builtin_nearbyint" + "__builtin_nearbyintf" + "__builtin_nearbyintl" + "__builtin_scalbn" + "__builtin_scalbnf" + "__builtin_scalbnl" + "__builtin_logb" + "__builtin_logbf" + "__builtin_logbl" + "__builtin_fmax" + "__builtin_fmaxf" + "__builtin_fmaxl" + "__builtin_fmin" + "__builtin_fminf" + "__builtin_fminl" "__builtin_rint" "__builtin_rintf" "__builtin_rintl" diff --git a/tool/tool.mk b/tool/tool.mk index cd8cc845..d0786f6a 100644 --- a/tool/tool.mk +++ b/tool/tool.mk @@ -4,7 +4,6 @@ .PHONY: o/$(MODE)/tool o/$(MODE)/tool: \ o/$(MODE)/tool/build \ - o/$(MODE)/tool/debug \ o/$(MODE)/tool/decode \ o/$(MODE)/tool/hash \ o/$(MODE)/tool/net \ diff --git a/tool/viz/basicidea.c b/tool/viz/basicidea.c index 17924834..0b4677e2 100644 --- a/tool/viz/basicidea.c +++ b/tool/viz/basicidea.c @@ -218,11 +218,11 @@ static void ReadAll(int fd, void *buf, size_t n) { static void LoadImageOrDie(const char *path, size_t size, long yn, long xn, unsigned char RGB[yn][xn][4]) { int pid, ws, fds[3]; - const char *convert; + char *convert, pathbuf[PATH_MAX]; if (isempty((convert = getenv("CONVERT"))) && !(IsWindows() && access((convert = "\\msys64\\mingw64\\bin\\convert.exe"), X_OK) != -1) && - !(convert = commandv("convert"))) { + !(convert = commandv("convert", pathbuf))) { fputs("'convert' command not found\r\n" "please install imagemagick\r\n", stderr); diff --git a/tool/viz/derasterize.c b/tool/viz/derasterize.c index 1999e947..e9a4efb6 100644 --- a/tool/viz/derasterize.c +++ b/tool/viz/derasterize.c @@ -488,8 +488,9 @@ static int ReadAll(int fd, void *data, size_t size) { static void LoadFileViaImageMagick(const char *path, unsigned yn, unsigned xn, unsigned char rgb[yn][YS][xn][XS][CN]) { const char *convert; + char pathbuf[PATH_MAX]; int pid, ws, fds[3] = {STDIN_FILENO, -1, STDERR_FILENO}; - if (!(convert = commandv("convert"))) { + if (!(convert = commandv("convert", pathbuf))) { fputs("error: `convert` command not found\n" "try: apt-get install imagemagick\n", stderr); diff --git a/tool/viz/lib/formatmatrix-double.c b/tool/viz/lib/formatmatrix-double.c index f146b2e2..ffc58a15 100644 --- a/tool/viz/lib/formatmatrix-double.c +++ b/tool/viz/lib/formatmatrix-double.c @@ -26,9 +26,9 @@ #include "tool/viz/lib/formatstringtable.h" #include "tool/viz/lib/stringbuilder.h" -static void *ConvertMatrixToStringTable(long yn, long xn, char *T[yn][xn], - const double M[yn][xn], double digs, - double rounder(double)) { +void *ConvertMatrixToStringTable(long yn, long xn, char *T[yn][xn], + const double M[yn][xn], double digs, + double rounder(double)) { double f; long y, x; assert(yn && xn && !T[0][0]); diff --git a/tool/viz/printvideo.c b/tool/viz/printvideo.c index 108848fb..121f7440 100644 --- a/tool/viz/printvideo.c +++ b/tool/viz/printvideo.c @@ -21,7 +21,6 @@ #include "dsp/core/half.h" #include "dsp/core/illumination.h" #include "dsp/mpeg/mpeg.h" -#include "dsp/mpeg/ycbcrio.h" #include "dsp/scale/scale.h" #include "dsp/tty/quant.h" #include "dsp/tty/tty.h" @@ -772,21 +771,6 @@ static void RenderIt(void) { EndRender(p); } -static noinline void SaveMpegFrame(plm_frame_t *pf) { - static long count; - struct Ycbcrio *m; - if (!count) { - if (!isdirectory("o/frames")) { - if (!isdirectory("o")) { - CHECK_NE(-1, mkdir("o", 0755)); - } - CHECK_NE(-1, mkdir("o/frames", 0755)); - } - } - m = YcbcrioOpen(gc(xasprintf("o/frames/%04ld.ycbcrio", (count++ % 100))), pf); - YcbcrioClose(&m); -} - static void RasterIt(void) { static bool once; static void *buf; @@ -873,7 +857,6 @@ static void OnVideo(plm_t *mpeg, plm_frame_t *pf, void *user) { if (f2_->n) { WARNF("video frame dropped"); } else { - /* if (pf3_) SaveMpegFrame(pf); */ TranscodeVideo(pf); if (!f1_->n) { xchg(&f1_, &f2_); diff --git a/tool/viz/upscalefloat.c b/tool/viz/upscalefloat.c index ad8e306c..12c40443 100644 --- a/tool/viz/upscalefloat.c +++ b/tool/viz/upscalefloat.c @@ -25,6 +25,7 @@ #include "libc/macros.h" #include "libc/math.h" #include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/madv.h" diff --git a/tool/viz/upscaleint.c b/tool/viz/upscaleint.c index e1fb1e7f..a3b976d8 100644 --- a/tool/viz/upscaleint.c +++ b/tool/viz/upscaleint.c @@ -25,6 +25,7 @@ #include "libc/macros.h" #include "libc/math.h" #include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/madv.h"