Browse Source

Make minor improvements

main
Justine Tunney 10 months ago
parent
commit
95b142e4e5
  1. 1
      dsp/scale/gyarados.c
  2. 3
      examples/unbourne.c
  3. 13
      libc/bits/bits.h
  4. 36
      libc/bits/bswap.h
  5. 1
      libc/bits/popcnt.c
  6. 2
      libc/bits/popcnt.h
  7. 62
      libc/bits/pushpop.h
  8. 8
      libc/calls/calls.h
  9. 2
      libc/calls/clock_gettime.c
  10. 2
      libc/calls/isdebuggerpresent.c
  11. 10
      libc/calls/sigaction.c
  12. 2
      libc/calls/unlink_s.c
  13. 1
      libc/fmt/conv.h
  14. 4
      libc/fmt/dirname.c
  15. 11
      libc/fmt/leb128.h
  16. 2
      libc/fmt/pflink.h
  17. 39
      libc/fmt/sleb128.c
  18. 2
      libc/fmt/strerror.c
  19. 6
      libc/fmt/unsleb128.c
  20. 52
      libc/integral/c.inc
  21. 5
      libc/intrin/mpsadbw.h
  22. 59
      libc/intrin/palignr.h
  23. 43
      libc/intrin/pslldq.h
  24. 43
      libc/intrin/psrldq.h
  25. 5
      libc/log/log.h
  26. 21
      libc/macros-cpp.internal.inc
  27. 5
      libc/macros.h
  28. 20
      libc/macros.internal.inc
  29. 2
      libc/nexgen32e/crc32.h
  30. 2
      libc/nexgen32e/crc32c-sse42.c
  31. 467
      libc/nexgen32e/memmove.inc
  32. 52
      libc/nexgen32e/strlen.S
  33. 4
      libc/sock/bind.c
  34. 3
      libc/sock/connect-sysv.c
  35. 2
      libc/sock/sendto.c
  36. 24
      libc/stdio/fputc.c
  37. 53
      libc/stdio/fputcfb.c
  38. 2
      libc/stdio/fputs.c
  39. 4
      libc/stdio/fread.c
  40. 4
      libc/stdio/g_stdbuf.c
  41. 2
      libc/stdio/stdio.h
  42. 2
      libc/str/chomp.c
  43. 3
      libc/str/decodentsutf16.c
  44. 4
      libc/str/hextoint.c
  45. 3
      libc/str/isgraph.c
  46. 3
      libc/str/isprint.c
  47. 3
      libc/str/ispunct.c
  48. 2
      libc/str/mbtowc.c
  49. 67
      libc/str/str.h
  50. 3
      libc/str/strlcpy.c
  51. 2
      libc/str/strsignal.c
  52. 10
      libc/str/tpdecodecb.internal.h
  53. 2
      libc/str/tpencode.ncabi.c
  54. 15
      libc/str/utf16.h
  55. 6
      libc/testlib/ugly.h
  56. 1
      libc/x/x.h
  57. 28
      libc/x/xdirname.c
  58. 17
      libc/x/xjoinpaths.c
  59. 10
      test/libc/fmt/basename_test.c
  60. 34
      test/libc/fmt/dirname_test.c
  61. 33
      test/libc/stdio/fputs_test.c
  62. 36
      test/libc/stdio/fread_test.c
  63. 47
      test/libc/str/bsr_test.c
  64. 8
      test/libc/str/strlen_test.c
  65. 31
      test/libc/x/xjoinpaths_test.c
  66. 2
      test/net/http/uricspn_test.c
  67. 1
      third_party/chibicc/README.cosmo
  68. 3844
      third_party/chibicc/as.c
  69. 3
      third_party/chibicc/asm.c
  70. 8
      third_party/chibicc/chibicc.c
  71. 5
      third_party/chibicc/chibicc.h
  72. 13
      third_party/chibicc/codegen.c
  73. 3
      third_party/chibicc/hashmap.c
  74. 34
      third_party/chibicc/hog.s
  75. 15
      third_party/chibicc/parse.c
  76. 2
      third_party/chibicc/preprocess.c
  77. 23
      third_party/chibicc/test/bitfield_test.c
  78. 50
      third_party/chibicc/test/dce_test.c
  79. 802
      third_party/chibicc/test/initializer_test.c
  80. 4
      third_party/chibicc/test/sizeof_test.c
  81. 2
      third_party/chibicc/tokenize.c
  82. 4
      third_party/chibicc/type.c
  83. 183
      third_party/compiler_rt/clear_cache.c
  84. 51
      third_party/compiler_rt/trampoline_setup.c
  85. 4
      third_party/duktape/duk_config.h
  86. 1
      tool/build/build.mk
  87. 18
      tool/build/lib/elfwriter.c
  88. 1
      tool/build/lib/elfwriter.h
  89. 6
      tool/build/lib/interner.c
  90. 2
      tool/build/lib/iovs.c
  91. 2
      tool/build/mkdeps.c
  92. 20
      tool/decode/elf.c
  93. 3
      tool/emacs/c.lang
  94. 5
      tool/emacs/cosmo-c-builtins.el
  95. 2
      tool/emacs/cosmo-stuff.el

1
dsp/scale/gyarados.c

@ -74,6 +74,7 @@ static struct SamplingSolution *NewSamplingSolution(long n, long s) {
ss->indices = xcalloc(n * s, sizeof(short));
return ss;
}
static bool IsNormalized(int n, double A[n]) {
int i;
double x;

3
examples/unbourne.c

@ -144,6 +144,9 @@
#include "third_party/gdtoa/gdtoa.h"
#include "third_party/musl/passwd.h"
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#undef CEOF
#undef rflag

13
libc/bits/bits.h

@ -263,11 +263,12 @@ unsigned long hamming(unsigned long, unsigned long) pureconst;
* @return LOCALVAR[0]
* @see xchg()
*/
#define lockxchg(MEMORY, LOCALVAR) \
({ \
static_assert(typescompatible(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR))); \
*(LOCALVAR); \
#define lockxchg(MEMORY, LOCALVAR) \
({ \
_Static_assert( \
__builtin_types_compatible_p(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR))); \
*(LOCALVAR); \
})
/**
@ -376,7 +377,7 @@ unsigned long hamming(unsigned long, unsigned long) pureconst;
#define __BitOp(OP, BIT, MEM) \
({ \
bool OldBit; \
if (isconstant(BIT)) { \
if (__builtin_constant_p(BIT)) { \
asm(CFLAG_ASM(OP "%z1\t%2,%1") \
: CFLAG_CONSTRAINT(OldBit), \
"+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \

36
libc/bits/bswap.h

@ -8,39 +8,9 @@ uint32_t bswap_32(uint32_t) pureconst;
uint32_t bswap_64(uint32_t) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define bswap_16(U16) \
(isconstant(U16) ? ((((U16)&0xff00) >> 010) | (((U16)&0x00ff) << 010)) : ({ \
uint16_t Swapped16, Werd16 = (U16); \
asm("xchg\t%b0,%h0" : "=Q"(Swapped16) : "0"(Werd16)); \
Swapped16; \
}))
#define bswap_32(U32) \
(isconstant(U32) \
? ((((U32)&0xff000000) >> 030) | (((U32)&0x000000ff) << 030) | \
(((U32)&0x00ff0000) >> 010) | (((U32)&0x0000ff00) << 010)) \
: ({ \
uint32_t Swapped32, Werd32 = (U32); \
asm("bswap\t%0" : "=r"(Swapped32) : "0"(Werd32)); \
Swapped32; \
}))
#define bswap_64(U64) \
(isconstant(U64) ? ((((U64)&0xff00000000000000ul) >> 070) | \
(((U64)&0x00000000000000fful) << 070) | \
(((U64)&0x00ff000000000000ul) >> 050) | \
(((U64)&0x000000000000ff00ul) << 050) | \
(((U64)&0x0000ff0000000000ul) >> 030) | \
(((U64)&0x0000000000ff0000ul) << 030) | \
(((U64)&0x000000ff00000000ul) >> 010) | \
(((U64)&0x00000000ff000000ul) << 010)) \
: ({ \
uint64_t Swapped64, Werd64 = (U64); \
asm("bswap\t%0" : "=r"(Swapped64) : "0"(Werd64)); \
Swapped64; \
}))
#define bswap_16(x) __builtin_bswap16(x)
#define bswap_32(x) __builtin_bswap32(x)
#define bswap_64(x) __builtin_bswap64(x)
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
COSMOPOLITAN_C_END_

1
libc/bits/popcnt.c

@ -20,7 +20,6 @@
#include "libc/bits/popcnt.h"
uint64_t(popcnt)(uint64_t x) {
uint32_t r;
x = x - ((x >> 1) & 0x5555555555555555);
x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;

2
libc/bits/popcnt.h

@ -8,7 +8,7 @@ unsigned long popcnt(unsigned long) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define popcnt(X) \
(isconstant(X) ? __builtin_popcountll(X) : ({ \
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
unsigned long Res, Pop = (X); \
if (X86_HAVE(POPCNT)) { \
asm("popcnt\t%1,%0" : "=r"(Res) : "r"(Pop) : "cc"); \

62
libc/bits/pushpop.h

@ -10,42 +10,44 @@
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushpop(x) (x)
#else
#define pushpop(x) \
({ \
typeof(x) Popped; \
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
if (x) { \
asm("push\t%1\n\t" \
"pop\t%q0" \
: "=r"(Popped) \
: "ir"(x)); \
} else { \
asm("xor\t%k0,%k0" : "=r"(Popped)); \
} \
} else { \
asm("" : "=r"(Popped) : "0"(x)); \
} \
Popped; \
#define pushpop(x) \
({ \
typeof(x) Popped; \
if (__builtin_constant_p(x) && \
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
if (x) { \
asm("push\t%1\n\t" \
"pop\t%q0" \
: "=r"(Popped) \
: "ir"(x)); \
} else { \
asm("xor\t%k0,%k0" : "=r"(Popped)); \
} \
} else { \
asm("" : "=r"(Popped) : "0"(x)); \
} \
Popped; \
})
#endif
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushmov(d, x) (*(d) = (x))
#else
#define pushmov(d, x) \
({ \
typeof(*(d)) Popped = (x); \
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
asm("pushq\t%1\n\t" \
"popq\t%0" \
: "=m"(*(d)) \
: "ir"(Popped)); \
} else { \
*(d) = Popped; \
} \
Popped; \
#define pushmov(d, x) \
({ \
typeof(*(d)) Popped = (x); \
if (__builtin_constant_p(x) && \
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
asm("pushq\t%1\n\t" \
"popq\t%0" \
: "=m"(*(d)) \
: "ir"(Popped)); \
} else { \
*(d) = Popped; \
} \
Popped; \
})
#endif

8
libc/calls/calls.h

@ -227,9 +227,9 @@ uint32_t gettid(void) nosideeffect;
uint32_t getuid(void) nosideeffect;
uint32_t umask(int32_t);
#define getcwd(BUF, SIZE) \
(isconstant(BUF) && (&(BUF)[0] == NULL) ? get_current_dir_name() \
: getcwd(BUF, SIZE))
#define getcwd(BUF, SIZE) \
(__builtin_constant_p(BUF) && (&(BUF)[0] == NULL) ? get_current_dir_name() \
: getcwd(BUF, SIZE))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § system calls » formatting
@ -249,7 +249,7 @@ void _init_wincrash(void);
#define __SIGACTION(FN, SIG, ...) \
({ \
if (SupportsWindows()) { \
if (isconstant(SIG)) { \
if (__builtin_constant_p(SIG)) { \
switch (SIG) { \
case SIGINT: \
case SIGQUIT: \

2
libc/calls/clock_gettime.c

@ -63,7 +63,7 @@ int clock_gettime(int clockid, struct timespec *out_ts) {
return clock_gettime$sysv(clockid, out_ts);
} else {
int rc;
static_assert(sizeof(struct timeval) == sizeof(struct timespec));
_Static_assert(sizeof(struct timeval) == sizeof(struct timespec));
if (out_ts) {
out_ts->tv_sec = 0;
out_ts->tv_nsec = 0;

2
libc/calls/isdebuggerpresent.c

@ -33,7 +33,7 @@
#define kBufSize 1024
#define kProcStatus "/proc/self/status"
alignas(16) static const char kGdbPid[] = "TracerPid:\t";
_Alignas(16) static const char kGdbPid[] = "TracerPid:\t";
/**
* Determines if gdb, strace, windbg, etc. is controlling process.

10
libc/calls/sigaction.c

@ -121,11 +121,11 @@ static void sigaction$native2cosmo(union metasigaction *sa) {
* @asyncsignalsafe
*/
int(sigaction)(int sig, const struct sigaction *act, struct sigaction *oldact) {
static_assert(sizeof(struct sigaction) > sizeof(struct sigaction$linux) &&
sizeof(struct sigaction) > sizeof(struct sigaction$xnu_in) &&
sizeof(struct sigaction) > sizeof(struct sigaction$xnu_out) &&
sizeof(struct sigaction) > sizeof(struct sigaction$freebsd) &&
sizeof(struct sigaction) > sizeof(struct sigaction$openbsd));
_Static_assert(sizeof(struct sigaction) > sizeof(struct sigaction$linux) &&
sizeof(struct sigaction) > sizeof(struct sigaction$xnu_in) &&
sizeof(struct sigaction) > sizeof(struct sigaction$xnu_out) &&
sizeof(struct sigaction) > sizeof(struct sigaction$freebsd) &&
sizeof(struct sigaction) > sizeof(struct sigaction$openbsd));
int rc, rva, oldrva;
struct sigaction *ap, copy;
if (!(0 < sig && sig < NSIG) || sig == SIGKILL || sig == SIGSTOP) {

2
libc/calls/unlink_s.c

@ -21,7 +21,7 @@
#include "libc/calls/calls.h"
/**
* Deletes file, the Cosmopolitan way.
* Deletes file.
*
* The caller's variable is made NULL. Note that we define unlink(NULL)
* as a no-op.

1
libc/fmt/conv.h

@ -19,7 +19,6 @@ long labs(long) libcesque pureconst;
long long llabs(long long) libcesque pureconst;
char *ltpcpy(char *, long) paramsnonnull() libcesque nocallback;
int llog10(unsigned long) libcesque pureconst;
int unsleb128(const void *, size_t, int64_t *);
int atoi(const char *) paramsnonnull() libcesque;
long atol(const char *) paramsnonnull() libcesque;
long long atoll(const char *) paramsnonnull() libcesque;

4
libc/fmt/dirname.c

@ -22,6 +22,10 @@
#define ISDELIM(c) (c == '/' || c == '\\' || c == '.')
/**
* Returns directory portion of path.
* @param s is mutated
*/
char *dirname(char *s) {
size_t i, n;
if (!(n = strlen(s))) return s;

11
libc/fmt/leb128.h

@ -0,0 +1,11 @@
#ifndef COSMOPOLITAN_LIBC_FMT_LEB128_H_
#define COSMOPOLITAN_LIBC_FMT_LEB128_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int sleb128(const void *, size_t, int128_t);
int unsleb128(const void *, size_t, int128_t *);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_FMT_LEB128_H_ */

2
libc/fmt/pflink.h

@ -46,7 +46,7 @@
#define ___PFLINK(FMT, FN, C) 1
#else
#define ___PFLINK(FMT, FN, C) \
!isconstant(FMT) || ((FMT) && __builtin_##FN(FMT, C) != NULL)
!__builtin_constant_p(FMT) || ((FMT) && __builtin_##FN(FMT, C) != NULL)
#endif
#if defined(__GNUC__) && __GNUC__ < 6

39
libc/fmt/sleb128.c

@ -0,0 +1,39 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/fmt/leb128.h"
/**
* Encodes sleb-128 signed integer.
*/
int sleb128(const void *buf, size_t size, int128_t x) {
int c;
unsigned i;
for (i = 0; i < size; ++i) {
c = x & 0x7f;
x >>= 7;
if ((x == 0 && !(c & 0x40)) || (x == -1 && (c & 0x40))) {
break;
} else {
c |= 0x80;
}
((char *)buf)[i] = c;
}
return i;
}

2
libc/fmt/strerror.c

@ -24,7 +24,7 @@
* @see strerror_r()
*/
char *strerror(int err) {
alignas(1) static char buf[512];
_Alignas(1) static char buf[512];
strerror_r(err, buf, sizeof(buf));
return buf;
}

6
libc/fmt/unsleb128.c

@ -17,7 +17,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/fmt/conv.h"
#include "libc/fmt/leb128.h"
/**
* Decodes a GNU-style varint from a buffer.
@ -25,9 +25,9 @@
* The GNU Assembler is able to encode numbers this way, since it's used
* by the DWARF debug format.
*/
int unsleb128(const void *buf, size_t size, int64_t *out) {
int unsleb128(const void *buf, size_t size, int128_t *out) {
int b;
int64_t r, w;
int128_t r, w;
unsigned char c;
const unsigned char *p, *pe;
pe = (p = buf) + size;

52
libc/integral/c.inc

@ -806,36 +806,6 @@ typedef uint64_t uintmax_t;
do { \
} while (0)
#ifndef likely
#define likely(expr) __builtin_expect(!!(expr), 1)
#endif
#ifndef unlikely
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#endif
/**
* Evaluates ternary expression without type promotion.
*/
#ifndef chooseexpr
#define chooseexpr(pred, a, b) __builtin_choose_expr(pred, a, b)
#endif
/**
* Returns true if expression can be evaluated at compile-time.
*/
#ifndef isconstant
#define isconstant(expr) __builtin_constant_p(expr)
#endif
#ifndef static_assert
#define static_assert(expr) _Static_assert(expr, #expr)
#endif
#ifndef typescompatible
#define typescompatible(a, b) __builtin_types_compatible_p(a, b)
#endif
#ifndef __STRICT_ANSI__
#define testonly noinline _Section(".test")
#define textstartup _Section(".text.startup") noinstrument
@ -873,10 +843,6 @@ typedef uint64_t uintmax_t;
#define offsetof(type, member) __builtin_offsetof(type, member)
#endif
#ifndef alignas
#define alignas(x) _Alignas(x)
#endif
#ifndef _Section
#ifndef __STRICT_ANSI__
#define _Section(s) __attribute__((__section__(s)))
@ -1029,15 +995,15 @@ typedef uint64_t uintmax_t;
* Pulls another module, by symbol, into linkage.
* @note nop is discarded by ape/ape.lds
*/
#define YOINK(SYMBOL) \
do { \
_Static_assert(!typescompatible(typeof(SYMBOL), char[]), \
"Please YOINK(symbol), not YOINK(\"symbol\")"); \
asm(".pushsection .yoink\n\t" \
"nop\t%a0\n\t" \
".popsection" \
: /* no outputs */ \
: "X"(SYMBOL)); \
#define YOINK(SYMBOL) \
do { \
_Static_assert(!__builtin_types_compatible_p(typeof(SYMBOL), char[]), \
"Please YOINK(symbol), not YOINK(\"symbol\")"); \
asm(".pushsection .yoink\n\t" \
"nop\t%a0\n\t" \
".popsection" \
: /* no outputs */ \
: "X"(SYMBOL)); \
} while (0)
/**

5
libc/intrin/mpsadbw.h

@ -10,11 +10,12 @@ void mpsadbw(uint16_t[8], const uint8_t[16], const uint8_t[16], uint8_t);
__intrin_xmm_t __mpsadbws(__intrin_xmm_t, __intrin_xmm_t);
#define mpsadbw(C, B, A, I) \
do { \
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1))) { \
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1), \
1)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (isconstant(I)) { \
if (__builtin_constant_p(I)) { \
if (!X86_NEED(AVX)) { \
asm("mpsadbw\t%2,%1,%0" \
: "=x"(*Xmm0) \

59
libc/intrin/palignr.h

@ -9,35 +9,36 @@ void palignr(void *, const void *, const void *, unsigned long);
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
#define palignr(C, B, A, I) \
do { \
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3))) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (isconstant(I)) { \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__palignrs) *Fn; \
if (likely(Vimm < 32)) { \
Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1, *Xmm2); \
} else { \
memset(Xmm0, 0, 16); \
} \
} \
} else { \
palignr(C, B, A, I); \
} \
#define palignr(C, B, A, I) \
do { \
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3), \
1)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (__builtin_constant_p(I)) { \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__palignrs) *Fn; \
if (__builtin_expect(Vimm < 32, 1)) { \
Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1, *Xmm2); \
} else { \
memset(Xmm0, 0, 16); \
} \
} \
} else { \
palignr(C, B, A, I); \
} \
} while (0)
#endif

43
libc/intrin/pslldq.h

@ -8,27 +8,28 @@ void pslldq(uint8_t[16], const uint8_t[16], unsigned long);
#ifndef __STRICT_ANSI__
__intrin_xmm_t __pslldqs(__intrin_xmm_t);
#define pslldq(B, A, I) \
do { \
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) { \
__intrin_xmm_t *Xmm0 = (void *)(B); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
if (isconstant(I)) { \
if (!X86_NEED(AVX)) { \
asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__pslldqs) *Fn; \
if (Vimm > 16) Vimm = 16; \
Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1); \
} \
} else { \
pslldq(B, A, I); \
} \
#define pslldq(B, A, I) \
do { \
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
1)) { \
__intrin_xmm_t *Xmm0 = (void *)(B); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
if (__builtin_constant_p(I)) { \
if (!X86_NEED(AVX)) { \
asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__pslldqs) *Fn; \
if (Vimm > 16) Vimm = 16; \
Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1); \
} \
} else { \
pslldq(B, A, I); \
} \
} while (0)
#endif

43
libc/intrin/psrldq.h

@ -8,27 +8,28 @@ void psrldq(uint8_t[16], const uint8_t[16], unsigned long);
#ifndef __STRICT_ANSI__
__intrin_xmm_t __psrldqs(__intrin_xmm_t);
#define psrldq(B, A, I) \
do { \
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) { \
__intrin_xmm_t *Xmm0 = (void *)(B); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
if (isconstant(I)) { \
if (!X86_NEED(AVX)) { \
asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__psrldqs) *Fn; \
if (Vimm > 16) Vimm = 16; \
Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1); \
} \
} else { \
psrldq(B, A, I); \
} \
#define psrldq(B, A, I) \
do { \
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
1)) { \
__intrin_xmm_t *Xmm0 = (void *)(B); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
if (__builtin_constant_p(I)) { \
if (!X86_NEED(AVX)) { \
asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__psrldqs) *Fn; \
if (Vimm > 16) Vimm = 16; \
Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1); \
} \
} else { \
psrldq(B, A, I); \
} \
} while (0)
#endif

5
libc/log/log.h

@ -56,8 +56,9 @@ bool isrunningundermake(void);
extern unsigned g_loglevel; /* log level for runtime check */
#define LOGGABLE(LEVEL) \
((!isconstant(LEVEL) || (LEVEL) <= LOGGABLELEVEL) && (LEVEL) <= g_loglevel)
#define LOGGABLE(LEVEL) \
((!__builtin_constant_p(LEVEL) || (LEVEL) <= LOGGABLELEVEL) && \
(LEVEL) <= g_loglevel)
#define LOGF(FMT, ...) \
do { \

21
libc/macros-cpp.internal.inc

@ -1,5 +1,24 @@
/* clang-format off */
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "ape/relocations.h"
/* clang-format off */
#if __MNO_VZEROUPPER__ + 0
#define vzeroupper

5
libc/macros.h

@ -13,6 +13,9 @@
#define TRUE 1
#define FALSE 0
#define alignas(x) _Alignas(x)
#define static_assert(x) _Static_assert(x, #x)
#define ROUNDUP(X, K) (((X) + (K)-1) & -(K))
#define ROUNDDOWN(X, K) ((X) & -(K))
#define ABS(X) ((X) >= 0 ? (X) : -(X))
@ -20,7 +23,7 @@
#define MAX(X, Y) ((Y) < (X) ? (X) : (Y))
#define PASTE(A, B) __PASTE(A, B)
#define STRINGIFY(A) __STRINGIFY(A)
#define EQUIVALENT(X, Y) (isconstant((X) == (Y)) && ((X) == (Y)))
#define EQUIVALENT(X, Y) (__builtin_constant_p((X) == (Y)) && ((X) == (Y)))
#define TYPE_BIT(type) (sizeof(type) * CHAR_BIT)
#define TYPE_SIGNED(type) (((type)-1) < 0)
#define TYPE_INTEGRAL(type) (((type)0.5) != 0.5)

20
libc/macros.internal.inc

@ -1,3 +1,23 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
/ Shorthand notation for widely-acknowledged sections.
.macro .rodata
.section .rodata,"a",@progbits

2
libc/nexgen32e/crc32.h

@ -5,7 +5,7 @@ COSMOPOLITAN_C_START_
void crc32init(uint32_t[hasatleast 256], uint32_t);
uint32_t crc32_z(uint32_t, const void *, size_t);
extern uint32_t (*const crc32c)(uint32_t, const void *, size_t) paramsnonnull();
extern uint32_t (*const crc32c)(uint32_t, const void *, size_t);
uint32_t crc32c$pure(uint32_t, const void *, size_t) strlenesque hidden;
uint32_t crc32c$sse42(uint32_t, const void *, size_t) strlenesque hidden;
uint32_t crc32$pclmul(uint32_t, const void *, size_t) hidden;

2
libc/nexgen32e/crc32c-sse42.c

@ -23,7 +23,7 @@
* Hashes data with hardware acceleration at 10GBps.
* @note needs Nehalem+ c. 2008 or Bulldozer+ c. 2011
*/
uint32_t crc32c$sse42(uint32_t init, const void *data, size_t n) {
optimizespeed uint32_t crc32c$sse42(uint32_t init, const void *data, size_t n) {
const unsigned char *p = (const unsigned char *)data;
const unsigned char *pe = (const unsigned char *)data + n;
uint32_t h = init ^ 0xffffffff;

467
libc/nexgen32e/memmove.inc

@ -1,467 +0,0 @@
/*
Copyright (c) 2014, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.ident "\n
memmove (Licensed BSD-3)\n
Copyright 2014 Intel Corporation"
.include "libc/disclaimer.inc"
#ifndef L
# define L(label) .L##label
#endif
#ifndef SHARED_CACHE_SIZE_HALF
#define SHARED_CACHE_SIZE_HALF (4 * 1024 * 1024)
#endif
push %rbx
push %rdx
push %r8
push %r9
/* Check whether we should copy backward or forward. */
cmp %rsi, %rdi
je L(mm_return)
jg L(mm_len_0_or_more_backward)
/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
separately. */
cmp $16, %rdx
jbe L(mm_len_0_16_bytes_forward)
cmp $32, %rdx
ja L(mm_len_32_or_more_forward)
/* Copy [0..32] and return. */
movdqu (%rsi), %xmm0
movdqu -16(%rsi, %rdx), %xmm1
movdqu %xmm0, (%rdi)
movdqu %xmm1, -16(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_32_or_more_forward):
cmp $64, %rdx
ja L(mm_len_64_or_more_forward)
/* Copy [0..64] and return. */
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu -16(%rsi, %rdx), %xmm2
movdqu -32(%rsi, %rdx), %xmm3
movdqu %xmm0, (%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, -16(%rdi, %rdx)
movdqu %xmm3, -32(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_64_or_more_forward):
cmp $128, %rdx
ja L(mm_len_128_or_more_forward)
/* Copy [0..128] and return. */
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu 32(%rsi), %xmm2
movdqu 48(%rsi), %xmm3
movdqu -64(%rsi, %rdx), %xmm4
movdqu -48(%rsi, %rdx), %xmm5
movdqu -32(%rsi, %rdx), %xmm6
movdqu -16(%rsi, %rdx), %xmm7
movdqu %xmm0, (%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, 32(%rdi)
movdqu %xmm3, 48(%rdi)
movdqu %xmm4, -64(%rdi, %rdx)
movdqu %xmm5, -48(%rdi, %rdx)
movdqu %xmm6, -32(%rdi, %rdx)
movdqu %xmm7, -16(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_128_or_more_forward):
/* Aligning the address of destination. */
/* save first unaligned 64 bytes */
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu 32(%rsi), %xmm2
movdqu 48(%rsi), %xmm3
lea 64(%rdi), %r8
and $-64, %r8 /* r8 now aligned to next 64 byte boundary */
sub %rdi, %rsi /* rsi = src - dst = diff */
movdqu (%r8, %rsi), %xmm4
movdqu 16(%r8, %rsi), %xmm5
movdqu 32(%r8, %rsi), %xmm6
movdqu 48(%r8, %rsi), %xmm7
movdqu %xmm0, (%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, 32(%rdi)
movdqu %xmm3, 48(%rdi)
movdqa %xmm4, (%r8)
movaps %xmm5, 16(%r8)
movaps %xmm6, 32(%r8)
movaps %xmm7, 48(%r8)
add $64, %r8
lea (%rdi, %rdx), %rbx
and $-64, %rbx
cmp %r8, %rbx
jbe L(mm_copy_remaining_forward)
cmp $SHARED_CACHE_SIZE_HALF, %rdx
jae L(mm_large_page_loop_forward)
.p2align 4
L(mm_main_loop_forward):
prefetcht0 128(%r8, %rsi)
movdqu (%r8, %rsi), %xmm0
movdqu 16(%r8, %rsi), %xmm1
movdqu 32(%r8, %rsi), %xmm2
movdqu 48(%r8, %rsi), %xmm3
movdqa %xmm0, (%r8)
movaps %xmm1, 16(%r8)
movaps %xmm2, 32(%r8)
movaps %xmm3, 48(%r8)
lea 64(%r8), %r8
cmp %r8, %rbx
ja L(mm_main_loop_forward)
L(mm_copy_remaining_forward):
add %rdi, %rdx
sub %r8, %rdx
/* We copied all up till %rdi position in the dst.
In %rdx now is how many bytes are left to copy.
Now we need to advance %r8. */
lea (%r8, %rsi), %r9
L(mm_remaining_0_64_bytes_forward):
cmp $32, %rdx
ja L(mm_remaining_33_64_bytes_forward)
cmp $16, %rdx
ja L(mm_remaining_17_32_bytes_forward)
test %rdx, %rdx
.p2align 4,,2
je L(mm_return)
cmpb $8, %dl
ja L(mm_remaining_9_16_bytes_forward)
cmpb $4, %dl
.p2align 4,,5
ja L(mm_remaining_5_8_bytes_forward)
cmpb $2, %dl
.p2align 4,,1
ja L(mm_remaining_3_4_bytes_forward)
movzbl -1(%r9,%rdx), %esi
movzbl (%r9), %ebx
movb %sil, -1(%r8,%rdx)
movb %bl, (%r8)
jmp L(mm_return)
L(mm_remaining_33_64_bytes_forward):
movdqu (%r9), %xmm0
movdqu 16(%r9), %xmm1
movdqu -32(%r9, %rdx), %xmm2
movdqu -16(%r9, %rdx), %xmm3
movdqu %xmm0, (%r8)
movdqu %xmm1, 16(%r8)
movdqu %xmm2, -32(%r8, %rdx)
movdqu %xmm3, -16(%r8, %rdx)
jmp L(mm_return)
L(mm_remaining_17_32_bytes_forward):
movdqu (%r9), %xmm0
movdqu -16(%r9, %rdx), %xmm1
movdqu %xmm0, (%r8)
movdqu %xmm1, -16(%r8, %rdx)
jmp L(mm_return)
L(mm_remaining_5_8_bytes_forward):
movl (%r9), %esi
movl -4(%r9,%rdx), %ebx
movl %esi, (%r8)
movl %ebx, -4(%r8,%rdx)
jmp L(mm_return)
L(mm_remaining_9_16_bytes_forward):
mov (%r9), %rsi
mov -8(%r9, %rdx), %rbx
mov %rsi, (%r8)
mov %rbx, -8(%r8, %rdx)
jmp L(mm_return)
L(mm_remaining_3_4_bytes_forward):
movzwl -2(%r9,%rdx), %esi
movzwl (%r9), %ebx
movw %si, -2(%r8,%rdx)
movw %bx, (%r8)
jmp L(mm_return)
L(mm_len_0_16_bytes_forward):
testb $24, %dl
jne L(mm_len_9_16_bytes_forward)
testb $4, %dl
.p2align 4,,5
jne L(mm_len_5_8_bytes_forward)
test %rdx, %rdx
.p2align 4,,2
je L(mm_return)
testb $2, %dl
.p2align 4,,1
jne L(mm_len_2_4_bytes_forward)
movzbl -1(%rsi,%rdx), %ebx
movzbl (%rsi), %esi
movb %bl, -1(%rdi,%rdx)
movb %sil, (%rdi)
jmp L(mm_return)
L(mm_len_2_4_bytes_forward):
movzwl -2(%rsi,%rdx), %ebx
movzwl (%rsi), %esi
movw %bx, -2(%rdi,%rdx)
movw %si, (%rdi)
jmp L(mm_return)
L(mm_len_5_8_bytes_forward):
movl (%rsi), %ebx
movl -4(%rsi,%rdx), %esi
movl %ebx, (%rdi)
movl %esi, -4(%rdi,%rdx)
jmp L(mm_return)
L(mm_len_9_16_bytes_forward):
mov (%rsi), %rbx
mov -8(%rsi, %rdx), %rsi
mov %rbx, (%rdi)
mov %rsi, -8(%rdi, %rdx)
jmp L(mm_return)
L(mm_recalc_len):
/* Compute in %rdx how many bytes are left to copy after
the main loop stops. */
mov %rbx, %rdx
sub %rdi, %rdx
/* The code for copying backwards. */
L(mm_len_0_or_more_backward):
/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
separately. */
cmp $16, %rdx
jbe L(mm_len_0_16_bytes_backward)
cmp $32, %rdx
ja L(mm_len_32_or_more_backward)
/* Copy [0..32] and return. */
movdqu (%rsi), %xmm0
movdqu -16(%rsi, %rdx), %xmm1
movdqu %xmm0, (%rdi)
movdqu %xmm1, -16(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_32_or_more_backward):
cmp $64, %rdx
ja L(mm_len_64_or_more_backward)
/* Copy [0..64] and return. */
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu -16(%rsi, %rdx), %xmm2
movdqu -32(%rsi, %rdx), %xmm3
movdqu %xmm0, (%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, -16(%rdi, %rdx)
movdqu %xmm3, -32(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_64_or_more_backward):
cmp $128, %rdx
ja L(mm_len_128_or_more_backward)
/* Copy [0..128] and return. */
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu 32(%rsi), %xmm2
movdqu 48(%rsi), %xmm3
movdqu -64(%rsi, %rdx), %xmm4
movdqu -48(%rsi, %rdx), %xmm5
movdqu -32(%rsi, %rdx), %xmm6
movdqu -16(%rsi, %rdx), %xmm7
movdqu %xmm0, (%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, 32(%rdi)
movdqu %xmm3, 48(%rdi)
movdqu %xmm4, -64(%rdi, %rdx)
movdqu %xmm5, -48(%rdi, %rdx)
movdqu %xmm6, -32(%rdi, %rdx)
movdqu %xmm7, -16(%rdi, %rdx)
jmp L(mm_return)
L(mm_len_128_or_more_backward):
/* Aligning the address of destination. We need to save
16 bits from the source in order not to overwrite them. */
movdqu -16(%rsi, %rdx), %xmm0
movdqu -32(%rsi, %rdx), %xmm1
movdqu -48(%rsi, %rdx), %xmm2
movdqu -64(%rsi, %rdx), %xmm3
lea (%rdi, %rdx), %r9
and $-64, %r9 /* r9 = aligned dst */
mov %rsi, %r8
sub %rdi, %r8 /* r8 = src - dst, diff */
movdqu -16(%r9, %r8), %xmm4
movdqu -32(%r9, %r8), %xmm5
movdqu -48(%r9, %r8), %xmm6
movdqu -64(%r9, %r8), %xmm7
movdqu %xmm0, -16(%rdi, %rdx)
movdqu %xmm1, -32(%rdi, %rdx)
movdqu %xmm2, -48(%rdi, %rdx)
movdqu %xmm3, -64(%rdi, %rdx)
movdqa %xmm4, -16(%r9)
movaps %xmm5, -32(%r9)
movaps %xmm6, -48(%r9)
movaps %xmm7, -64(%r9)
lea -64(%r9), %r9
lea 64(%rdi), %rbx
and $-64, %rbx
cmp %r9, %rbx
jae L(mm_recalc_len)
cmp $SHARED_CACHE_SIZE_HALF, %rdx
jae L(mm_large_page_loop_backward)
.p2align 4
L(mm_main_loop_backward):
prefetcht0 -128(%r9, %r8)
movdqu -64(%r9, %r8), %xmm0
movdqu -48(%r9, %r8), %xmm1
movdqu -32(%r9, %r8), %xmm2
movdqu -16(%r9, %r8), %xmm3
movdqa %xmm0, -64(%r9)
movaps %xmm1, -48(%r9)
movaps %xmm2, -32(%r9)
movaps %xmm3, -16(%r9)
lea -64(%r9), %r9
cmp %r9, %rbx
jb L(mm_main_loop_backward)
jmp L(mm_recalc_len)
/* Copy [0..16] and return. */
L(mm_len_0_16_bytes_backward):
testb $24, %dl
jnz L(mm_len_9_16_bytes_backward)
testb $4, %dl
.p2align 4,,5
jnz L(mm_len_5_8_bytes_backward)
test %rdx, %rdx
.p2align 4,,2
je L(mm_return)
testb $2, %dl
.p2align 4,,1
jne L(mm_len_3_4_bytes_backward)
movzbl -1(%rsi,%rdx), %ebx
movzbl (%rsi), %ecx
movb %bl, -1(%rdi,%rdx)
movb %cl, (%rdi)
jmp L(mm_return)
L(mm_len_3_4_bytes_backward):
movzwl -2(%rsi,%rdx), %ebx
movzwl (%rsi), %ecx
movw %bx, -2(%rdi,%rdx)
movw %cx, (%rdi)
jmp L(mm_return)
L(mm_len_9_16_bytes_backward):
movl -4(%rsi,%rdx), %ebx
movl -8(%rsi,%rdx), %ecx
movl %ebx, -4(%rdi,%rdx)
movl %ecx, -8(%rdi,%rdx)
sub $8, %rdx
jmp L(mm_len_0_16_bytes_backward)
L(mm_len_5_8_bytes_backward):
movl (%rsi), %ebx
movl -4(%rsi,%rdx), %ecx
movl %ebx, (%rdi)
movl %ecx, -4(%rdi,%rdx)
L(mm_return):
pop %r9
pop %r8
pop %rdx
pop %rbx
pop %rbp
ret
/* Big length copy forward part. */
.p2align 4
L(mm_large_page_loop_forward):
movdqu (%r8, %rsi), %xmm0
movdqu 16(%r8, %rsi), %xmm1
movdqu 32(%r8, %rsi), %xmm2
movdqu 48(%r8, %rsi), %xmm3
movntdq %xmm0, (%r8)
movntdq %xmm1, 16(%r8)
movntdq %xmm2, 32(%r8)
movntdq %xmm3, 48(%r8)
lea 64(%r8), %r8
cmp %r8, %rbx
ja L(mm_large_page_loop_forward)
sfence
jmp L(mm_copy_remaining_forward)
/* Big length copy backward part. */
.p2align 4
L(mm_large_page_loop_backward):
movdqu -64(%r9, %r8), %xmm0
movdqu -48(%r9, %r8), %xmm1
movdqu -32(%r9, %r8), %xmm2
movdqu -16(%r9, %r8), %xmm3
movntdq %xmm0, -64(%r9)
movntdq %xmm1, -48(%r9)
movntdq %xmm2, -32(%r9)
movntdq %xmm3, -16(%r9)
lea -64(%r9), %r9
cmp %r9, %rbx
jb L(mm_large_page_loop_backward)
sfence
jmp L(mm_recalc_len)

52
libc/nexgen32e/strlen.S

@ -0,0 +1,52 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for