Make minor improvements

2020-12-23 23:42:56 -08:00 · 2020-12-23 23:42:56 -08:00 · 95b142e4e5
parent 04caf6f9ad
commit 95b142e4e5
95 changed files with 3818 additions and 2760 deletions
--- a/dsp/scale/gyarados.c
+++ b/dsp/scale/gyarados.c
@ -74,6 +74,7 @@ static struct SamplingSolution *NewSamplingSolution(long n, long s) {
  ss->indices = xcalloc(n * s, sizeof(short));
  return ss;
 }
+
 static bool IsNormalized(int n, double A[n]) {
  int i;
  double x;
--- a/examples/unbourne.c
+++ b/examples/unbourne.c
@ -144,6 +144,9 @@
 #include "third_party/gdtoa/gdtoa.h"
 #include "third_party/musl/passwd.h"

+#define likely(expr)   __builtin_expect(!!(expr), 1)
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+
 #undef CEOF
 #undef rflag

--- a/libc/bits/bits.h
+++ b/libc/bits/bits.h
@ -263,11 +263,12 @@ unsigned long hamming(unsigned long, unsigned long) pureconst;
 * @return LOCALVAR[0]
 * @see xchg()
 */
-#define lockxchg(MEMORY, LOCALVAR)                                          \
-  ({                                                                        \
-    static_assert(typescompatible(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
-    asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR)));               \
-    *(LOCALVAR);                                                            \
+#define lockxchg(MEMORY, LOCALVAR)                                             \
+  ({                                                                           \
+    _Static_assert(                                                            \
+        __builtin_types_compatible_p(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
+    asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR)));                  \
+    *(LOCALVAR);                                                               \
  })

 /**
@ -376,7 +377,7 @@ unsigned long hamming(unsigned long, unsigned long) pureconst;
 #define __BitOp(OP, BIT, MEM)                                  \
  ({                                                           \
    bool OldBit;                                               \
-    if (isconstant(BIT)) {                                     \
+    if (__builtin_constant_p(BIT)) {                           \
      asm(CFLAG_ASM(OP "%z1\t%2,%1")                           \
          : CFLAG_CONSTRAINT(OldBit),                          \
            "+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
--- a/libc/bits/bswap.h
+++ b/libc/bits/bswap.h
@ -8,39 +8,9 @@ uint32_t bswap_32(uint32_t) pureconst;
 uint32_t bswap_64(uint32_t) pureconst;

 #if defined(__GNUC__) && !defined(__STRICT_ANSI__)
-
-#define bswap_16(U16)                                                         \
-  (isconstant(U16) ? ((((U16)&0xff00) >> 010) | (((U16)&0x00ff) << 010)) : ({ \
-    uint16_t Swapped16, Werd16 = (U16);                                       \
-    asm("xchg\t%b0,%h0" : "=Q"(Swapped16) : "0"(Werd16));                     \
-    Swapped16;                                                                \
-  }))
-
-#define bswap_32(U32)                                                 \
-  (isconstant(U32)                                                    \
-       ? ((((U32)&0xff000000) >> 030) | (((U32)&0x000000ff) << 030) | \
-          (((U32)&0x00ff0000) >> 010) | (((U32)&0x0000ff00) << 010))  \
-       : ({                                                           \
-           uint32_t Swapped32, Werd32 = (U32);                        \
-           asm("bswap\t%0" : "=r"(Swapped32) : "0"(Werd32));          \
-           Swapped32;                                                 \
-         }))
-
-#define bswap_64(U64)                                                    \
-  (isconstant(U64) ? ((((U64)&0xff00000000000000ul) >> 070) |            \
-                      (((U64)&0x00000000000000fful) << 070) |            \
-                      (((U64)&0x00ff000000000000ul) >> 050) |            \
-                      (((U64)&0x000000000000ff00ul) << 050) |            \
-                      (((U64)&0x0000ff0000000000ul) >> 030) |            \
-                      (((U64)&0x0000000000ff0000ul) << 030) |            \
-                      (((U64)&0x000000ff00000000ul) >> 010) |            \
-                      (((U64)&0x00000000ff000000ul) << 010))             \
-                   : ({                                                  \
-                       uint64_t Swapped64, Werd64 = (U64);               \
-                       asm("bswap\t%0" : "=r"(Swapped64) : "0"(Werd64)); \
-                       Swapped64;                                        \
-                     }))
-
+#define bswap_16(x) __builtin_bswap16(x)
+#define bswap_32(x) __builtin_bswap32(x)
+#define bswap_64(x) __builtin_bswap64(x)
 #endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */

 COSMOPOLITAN_C_END_
--- a/libc/bits/popcnt.c
+++ b/libc/bits/popcnt.c
@ -20,7 +20,6 @@
 #include "libc/bits/popcnt.h"

 uint64_t(popcnt)(uint64_t x) {
-  uint32_t r;
  x = x - ((x >> 1) & 0x5555555555555555);
  x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
  x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
--- a/libc/bits/popcnt.h
+++ b/libc/bits/popcnt.h
@ -8,7 +8,7 @@ unsigned long popcnt(unsigned long) pureconst;

 #if defined(__GNUC__) && !defined(__STRICT_ANSI__)
 #define popcnt(X)                                         \
-  (isconstant(X) ? __builtin_popcountll(X) : ({           \
+  (__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
    unsigned long Res, Pop = (X);                         \
    if (X86_HAVE(POPCNT)) {                               \
      asm("popcnt\t%1,%0" : "=r"(Res) : "r"(Pop) : "cc"); \
--- a/libc/bits/pushpop.h
+++ b/libc/bits/pushpop.h
@ -10,42 +10,44 @@
 #if !defined(__GNUC__) || defined(__STRICT_ANSI__)
 #define pushpop(x) (x)
 #else
-#define pushpop(x)                                                           \
-  ({                                                                         \
-    typeof(x) Popped;                                                        \
-    if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
-                                                 : (intptr_t)(x) < 128)) {   \
-      if (x) {                                                               \
-        asm("push\t%1\n\t"                                                   \
-            "pop\t%q0"                                                       \
-            : "=r"(Popped)                                                   \
-            : "ir"(x));                                                      \
-      } else {                                                               \
-        asm("xor\t%k0,%k0" : "=r"(Popped));                                  \
-      }                                                                      \
-    } else {                                                                 \
-      asm("" : "=r"(Popped) : "0"(x));                                       \
-    }                                                                        \
-    Popped;                                                                  \
+#define pushpop(x)                                          \
+  ({                                                        \
+    typeof(x) Popped;                                       \
+    if (__builtin_constant_p(x) &&                          \
+        (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
+                                : (intptr_t)(x) < 128)) {   \
+      if (x) {                                              \
+        asm("push\t%1\n\t"                                  \
+            "pop\t%q0"                                      \
+            : "=r"(Popped)                                  \
+            : "ir"(x));                                     \
+      } else {                                              \
+        asm("xor\t%k0,%k0" : "=r"(Popped));                 \
+      }                                                     \
+    } else {                                                \
+      asm("" : "=r"(Popped) : "0"(x));                      \
+    }                                                       \
+    Popped;                                                 \
  })
 #endif

 #if !defined(__GNUC__) || defined(__STRICT_ANSI__)
 #define pushmov(d, x) (*(d) = (x))
 #else
-#define pushmov(d, x)                                                        \
-  ({                                                                         \
-    typeof(*(d)) Popped = (x);                                               \
-    if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
-                                                 : (intptr_t)(x) < 128)) {   \
-      asm("pushq\t%1\n\t"                                                    \
-          "popq\t%0"                                                         \
-          : "=m"(*(d))                                                       \
-          : "ir"(Popped));                                                   \
-    } else {                                                                 \
-      *(d) = Popped;                                                         \
-    }                                                                        \
-    Popped;                                                                  \
+#define pushmov(d, x)                                       \
+  ({                                                        \
+    typeof(*(d)) Popped = (x);                              \
+    if (__builtin_constant_p(x) &&                          \
+        (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
+                                : (intptr_t)(x) < 128)) {   \
+      asm("pushq\t%1\n\t"                                   \
+          "popq\t%0"                                        \
+          : "=m"(*(d))                                      \
+          : "ir"(Popped));                                  \
+    } else {                                                \
+      *(d) = Popped;                                        \
+    }                                                       \
+    Popped;                                                 \
  })
 #endif

--- a/libc/calls/calls.h
+++ b/libc/calls/calls.h
@ -227,9 +227,9 @@ uint32_t gettid(void) nosideeffect;
 uint32_t getuid(void) nosideeffect;
 uint32_t umask(int32_t);

-#define getcwd(BUF, SIZE)                                          \
-  (isconstant(BUF) && (&(BUF)[0] == NULL) ? get_current_dir_name() \
-                                          : getcwd(BUF, SIZE))
+#define getcwd(BUF, SIZE)                                                    \
+  (__builtin_constant_p(BUF) && (&(BUF)[0] == NULL) ? get_current_dir_name() \
+                                                    : getcwd(BUF, SIZE))

 /*───────────────────────────────────────────────────────────────────────────│─╗
 │ cosmopolitan § system calls » formatting                                 ─╬─│┼
@ -249,7 +249,7 @@ void _init_wincrash(void);
 #define __SIGACTION(FN, SIG, ...)          \
  ({                                       \
    if (SupportsWindows()) {               \
-      if (isconstant(SIG)) {               \
+      if (__builtin_constant_p(SIG)) {     \
        switch (SIG) {                     \
          case SIGINT:                     \
          case SIGQUIT:                    \
--- a/libc/calls/clock_gettime.c
+++ b/libc/calls/clock_gettime.c
@ -63,7 +63,7 @@ int clock_gettime(int clockid, struct timespec *out_ts) {
      return clock_gettime$sysv(clockid, out_ts);
    } else {
      int rc;
-      static_assert(sizeof(struct timeval) == sizeof(struct timespec));
+      _Static_assert(sizeof(struct timeval) == sizeof(struct timespec));
      if (out_ts) {
        out_ts->tv_sec = 0;
        out_ts->tv_nsec = 0;
--- a/libc/calls/isdebuggerpresent.c
+++ b/libc/calls/isdebuggerpresent.c
@ -33,7 +33,7 @@

 #define kBufSize    1024
 #define kProcStatus "/proc/self/status"
-alignas(16) static const char kGdbPid[] = "TracerPid:\t";
+_Alignas(16) static const char kGdbPid[] = "TracerPid:\t";

 /**
 * Determines if gdb, strace, windbg, etc. is controlling process.
--- a/libc/calls/sigaction.c
+++ b/libc/calls/sigaction.c
@ -121,11 +121,11 @@ static void sigaction$native2cosmo(union metasigaction *sa) {
 * @asyncsignalsafe
 */
 int(sigaction)(int sig, const struct sigaction *act, struct sigaction *oldact) {
-  static_assert(sizeof(struct sigaction) > sizeof(struct sigaction$linux) &&
-                sizeof(struct sigaction) > sizeof(struct sigaction$xnu_in) &&
-                sizeof(struct sigaction) > sizeof(struct sigaction$xnu_out) &&
-                sizeof(struct sigaction) > sizeof(struct sigaction$freebsd) &&
-                sizeof(struct sigaction) > sizeof(struct sigaction$openbsd));
+  _Static_assert(sizeof(struct sigaction) > sizeof(struct sigaction$linux) &&
+                 sizeof(struct sigaction) > sizeof(struct sigaction$xnu_in) &&
+                 sizeof(struct sigaction) > sizeof(struct sigaction$xnu_out) &&
+                 sizeof(struct sigaction) > sizeof(struct sigaction$freebsd) &&
+                 sizeof(struct sigaction) > sizeof(struct sigaction$openbsd));
  int rc, rva, oldrva;
  struct sigaction *ap, copy;
  if (!(0 < sig && sig < NSIG) || sig == SIGKILL || sig == SIGSTOP) {
--- a/libc/calls/unlink_s.c
+++ b/libc/calls/unlink_s.c
@ -21,7 +21,7 @@
 #include "libc/calls/calls.h"

 /**
- * Deletes file, the Cosmopolitan way.
+ * Deletes file.
 *
 * The caller's variable is made NULL. Note that we define unlink(NULL)
 * as a no-op.
--- a/libc/fmt/conv.h
+++ b/libc/fmt/conv.h
@ -19,7 +19,6 @@ long labs(long) libcesque pureconst;
 long long llabs(long long) libcesque pureconst;
 char *ltpcpy(char *, long) paramsnonnull() libcesque nocallback;
 int llog10(unsigned long) libcesque pureconst;
-int unsleb128(const void *, size_t, int64_t *);
 int atoi(const char *) paramsnonnull() libcesque;
 long atol(const char *) paramsnonnull() libcesque;
 long long atoll(const char *) paramsnonnull() libcesque;
--- a/libc/fmt/dirname.c
+++ b/libc/fmt/dirname.c
@ -22,6 +22,10 @@

 #define ISDELIM(c) (c == '/' || c == '\\' || c == '.')

+/**
+ * Returns directory portion of path.
+ * @param s is mutated
+ */
 char *dirname(char *s) {
  size_t i, n;
  if (!(n = strlen(s))) return s;
--- a/libc/fmt/leb128.h
+++ b/libc/fmt/leb128.h
@ -0,0 +1,11 @@
+#ifndef COSMOPOLITAN_LIBC_FMT_LEB128_H_
+#define COSMOPOLITAN_LIBC_FMT_LEB128_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+
+int sleb128(const void *, size_t, int128_t);
+int unsleb128(const void *, size_t, int128_t *);
+
+COSMOPOLITAN_C_END_
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_LIBC_FMT_LEB128_H_ */
--- a/libc/fmt/pflink.h
+++ b/libc/fmt/pflink.h
@ -46,7 +46,7 @@
 #define ___PFLINK(FMT, FN, C) 1
 #else
 #define ___PFLINK(FMT, FN, C) \
-  !isconstant(FMT) || ((FMT) && __builtin_##FN(FMT, C) != NULL)
+  !__builtin_constant_p(FMT) || ((FMT) && __builtin_##FN(FMT, C) != NULL)
 #endif

 #if defined(__GNUC__) && __GNUC__ < 6
--- a/libc/fmt/sleb128.c
+++ b/libc/fmt/sleb128.c
@ -0,0 +1,39 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/fmt/leb128.h"
+
+/**
+ * Encodes sleb-128 signed integer.
+ */
+int sleb128(const void *buf, size_t size, int128_t x) {
+  int c;
+  unsigned i;
+  for (i = 0; i < size; ++i) {
+    c = x & 0x7f;
+    x >>= 7;
+    if ((x == 0 && !(c & 0x40)) || (x == -1 && (c & 0x40))) {
+      break;
+    } else {
+      c |= 0x80;
+    }
+    ((char *)buf)[i] = c;
+  }
+  return i;
+}
--- a/libc/fmt/strerror.c
+++ b/libc/fmt/strerror.c
@ -24,7 +24,7 @@
 * @see strerror_r()
 */
 char *strerror(int err) {
-  alignas(1) static char buf[512];
+  _Alignas(1) static char buf[512];
  strerror_r(err, buf, sizeof(buf));
  return buf;
 }
--- a/libc/fmt/unsleb128.c
+++ b/libc/fmt/unsleb128.c
@ -17,7 +17,7 @@
 │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
 │ 02110-1301 USA                                                               │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/fmt/conv.h"
+#include "libc/fmt/leb128.h"

 /**
 * Decodes a GNU-style varint from a buffer.
@ -25,9 +25,9 @@
 * The GNU Assembler is able to encode numbers this way, since it's used
 * by the DWARF debug format.
 */
-int unsleb128(const void *buf, size_t size, int64_t *out) {
+int unsleb128(const void *buf, size_t size, int128_t *out) {
  int b;
-  int64_t r, w;
+  int128_t r, w;
  unsigned char c;
  const unsigned char *p, *pe;
  pe = (p = buf) + size;
--- a/libc/integral/c.inc
+++ b/libc/integral/c.inc
@ -806,36 +806,6 @@ typedef uint64_t uintmax_t;
  do {            \
  } while (0)

-#ifndef likely
-#define likely(expr) __builtin_expect(!!(expr), 1)
-#endif
-
-#ifndef unlikely
-#define unlikely(expr) __builtin_expect(!!(expr), 0)
-#endif
-
-/**
- * Evaluates ternary expression without type promotion.
- */
-#ifndef chooseexpr
-#define chooseexpr(pred, a, b) __builtin_choose_expr(pred, a, b)
-#endif
-
-/**
- * Returns true if expression can be evaluated at compile-time.
- */
-#ifndef isconstant
-#define isconstant(expr) __builtin_constant_p(expr)
-#endif
-
-#ifndef static_assert
-#define static_assert(expr) _Static_assert(expr, #expr)
-#endif
-
-#ifndef typescompatible
-#define typescompatible(a, b) __builtin_types_compatible_p(a, b)
-#endif
-
 #ifndef __STRICT_ANSI__
 #define testonly    noinline _Section(".test")
 #define textstartup _Section(".text.startup") noinstrument
@ -873,10 +843,6 @@ typedef uint64_t uintmax_t;
 #define offsetof(type, member) __builtin_offsetof(type, member)
 #endif

-#ifndef alignas
-#define alignas(x) _Alignas(x)
-#endif
-
 #ifndef _Section
 #ifndef __STRICT_ANSI__
 #define _Section(s) __attribute__((__section__(s)))
@ -1029,15 +995,15 @@ typedef uint64_t uintmax_t;
 * Pulls another module, by symbol, into linkage.
 * @note nop is discarded by ape/ape.lds
 */
-#define YOINK(SYMBOL)                                              \
-  do {                                                             \
-    _Static_assert(!typescompatible(typeof(SYMBOL), char[]),       \
-                   "Please YOINK(symbol), not YOINK(\"symbol\")"); \
-    asm(".pushsection .yoink\n\t"                                  \
-        "nop\t%a0\n\t"                                             \
-        ".popsection"                                              \
-        : /* no outputs */                                         \
-        : "X"(SYMBOL));                                            \
+#define YOINK(SYMBOL)                                                     \
+  do {                                                                    \
+    _Static_assert(!__builtin_types_compatible_p(typeof(SYMBOL), char[]), \
+                   "Please YOINK(symbol), not YOINK(\"symbol\")");        \
+    asm(".pushsection .yoink\n\t"                                         \
+        "nop\t%a0\n\t"                                                    \
+        ".popsection"                                                     \
+        : /* no outputs */                                                \
+        : "X"(SYMBOL));                                                   \
  } while (0)

 /**
--- a/libc/intrin/mpsadbw.h
+++ b/libc/intrin/mpsadbw.h
@ -10,11 +10,12 @@ void mpsadbw(uint16_t[8], const uint8_t[16], const uint8_t[16], uint8_t);
 __intrin_xmm_t __mpsadbws(__intrin_xmm_t, __intrin_xmm_t);
 #define mpsadbw(C, B, A, I)                                                   \
  do {                                                                        \
-    if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1))) {          \
+    if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1),   \
+                         1)) {                                                \
      __intrin_xmm_t *Xmm0 = (void *)(C);                                     \
      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B);               \
      const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A);               \
-      if (isconstant(I)) {                                                    \
+      if (__builtin_constant_p(I)) {                                          \
        if (!X86_NEED(AVX)) {                                                 \
          asm("mpsadbw\t%2,%1,%0"                                             \
              : "=x"(*Xmm0)                                                   \
--- a/libc/intrin/palignr.h
+++ b/libc/intrin/palignr.h
@ -9,35 +9,36 @@ void palignr(void *, const void *, const void *, unsigned long);

 #if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
 __intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
-#define palignr(C, B, A, I)                                               \
-  do {                                                                    \
-    if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3))) {       \
-      __intrin_xmm_t *Xmm0 = (void *)(C);                                 \
-      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B);           \
-      const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A);           \
-      if (isconstant(I)) {                                                \
-        if (!X86_NEED(AVX)) {                                             \
-          asm("palignr\t%2,%1,%0"                                         \
-              : "=x"(*Xmm0)                                               \
-              : "x"(*Xmm2), "i"(I), "0"(*Xmm1));                          \
-        } else {                                                          \
-          asm("vpalignr\t%3,%2,%1,%0"                                     \
-              : "=x"(*Xmm0)                                               \
-              : "x"(*Xmm1), "x"(*Xmm2), "i"(I));                          \
-        }                                                                 \
-      } else {                                                            \
-        unsigned long Vimm = (I);                                         \
-        typeof(__palignrs) *Fn;                                           \
-        if (likely(Vimm < 32)) {                                          \
-          Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
-          *Xmm0 = Fn(*Xmm1, *Xmm2);                                       \
-        } else {                                                          \
-          memset(Xmm0, 0, 16);                                            \
-        }                                                                 \
-      }                                                                   \
-    } else {                                                              \
-      palignr(C, B, A, I);                                                \
-    }                                                                     \
+#define palignr(C, B, A, I)                                                \
+  do {                                                                     \
+    if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3), \
+                         1)) {                                             \
+      __intrin_xmm_t *Xmm0 = (void *)(C);                                  \
+      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B);            \
+      const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A);            \
+      if (__builtin_constant_p(I)) {                                       \
+        if (!X86_NEED(AVX)) {                                              \
+          asm("palignr\t%2,%1,%0"                                          \
+              : "=x"(*Xmm0)                                                \
+              : "x"(*Xmm2), "i"(I), "0"(*Xmm1));                           \
+        } else {                                                           \
+          asm("vpalignr\t%3,%2,%1,%0"                                      \
+              : "=x"(*Xmm0)                                                \
+              : "x"(*Xmm1), "x"(*Xmm2), "i"(I));                           \
+        }                                                                  \
+      } else {                                                             \
+        unsigned long Vimm = (I);                                          \
+        typeof(__palignrs) *Fn;                                            \
+        if (__builtin_expect(Vimm < 32, 1)) {                              \
+          Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8);  \
+          *Xmm0 = Fn(*Xmm1, *Xmm2);                                        \
+        } else {                                                           \
+          memset(Xmm0, 0, 16);                                             \
+        }                                                                  \
+      }                                                                    \
+    } else {                                                               \
+      palignr(C, B, A, I);                                                 \
+    }                                                                      \
  } while (0)
 #endif

--- a/libc/intrin/pslldq.h
+++ b/libc/intrin/pslldq.h
@ -8,27 +8,28 @@ void pslldq(uint8_t[16], const uint8_t[16], unsigned long);

 #ifndef __STRICT_ANSI__
 __intrin_xmm_t __pslldqs(__intrin_xmm_t);
-#define pslldq(B, A, I)                                                \
-  do {                                                                 \
-    if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) {     \
-      __intrin_xmm_t *Xmm0 = (void *)(B);                              \
-      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A);        \
-      if (isconstant(I)) {                                             \
-        if (!X86_NEED(AVX)) {                                          \
-          asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1));     \
-        } else {                                                       \
-          asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
-        }                                                              \
-      } else {                                                         \
-        unsigned long Vimm = (I);                                      \
-        typeof(__pslldqs) *Fn;                                         \
-        if (Vimm > 16) Vimm = 16;                                      \
-        Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8);  \
-        *Xmm0 = Fn(*Xmm1);                                             \
-      }                                                                \
-    } else {                                                           \
-      pslldq(B, A, I);                                                 \
-    }                                                                  \
+#define pslldq(B, A, I)                                                   \
+  do {                                                                    \
+    if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
+                         1)) {                                            \
+      __intrin_xmm_t *Xmm0 = (void *)(B);                                 \
+      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A);           \
+      if (__builtin_constant_p(I)) {                                      \
+        if (!X86_NEED(AVX)) {                                             \
+          asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1));        \
+        } else {                                                          \
+          asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I));    \
+        }                                                                 \
+      } else {                                                            \
+        unsigned long Vimm = (I);                                         \
+        typeof(__pslldqs) *Fn;                                            \
+        if (Vimm > 16) Vimm = 16;                                         \
+        Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8);     \
+        *Xmm0 = Fn(*Xmm1);                                                \
+      }                                                                   \
+    } else {                                                              \
+      pslldq(B, A, I);                                                    \
+    }                                                                     \
  } while (0)
 #endif

--- a/libc/intrin/psrldq.h
+++ b/libc/intrin/psrldq.h
@ -8,27 +8,28 @@ void psrldq(uint8_t[16], const uint8_t[16], unsigned long);

 #ifndef __STRICT_ANSI__
 __intrin_xmm_t __psrldqs(__intrin_xmm_t);
-#define psrldq(B, A, I)                                                \
-  do {                                                                 \
-    if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) {     \
-      __intrin_xmm_t *Xmm0 = (void *)(B);                              \
-      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A);        \
-      if (isconstant(I)) {                                             \
-        if (!X86_NEED(AVX)) {                                          \
-          asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1));     \
-        } else {                                                       \
-          asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
-        }                                                              \
-      } else {                                                         \
-        unsigned long Vimm = (I);                                      \
-        typeof(__psrldqs) *Fn;                                         \
-        if (Vimm > 16) Vimm = 16;                                      \
-        Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8);  \
-        *Xmm0 = Fn(*Xmm1);                                             \
-      }                                                                \
-    } else {                                                           \
-      psrldq(B, A, I);                                                 \
-    }                                                                  \
+#define psrldq(B, A, I)                                                   \
+  do {                                                                    \
+    if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
+                         1)) {                                            \
+      __intrin_xmm_t *Xmm0 = (void *)(B);                                 \
+      const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A);           \
+      if (__builtin_constant_p(I)) {                                      \
+        if (!X86_NEED(AVX)) {                                             \
+          asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1));        \
+        } else {                                                          \
+          asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I));    \
+        }                                                                 \
+      } else {                                                            \
+        unsigned long Vimm = (I);                                         \
+        typeof(__psrldqs) *Fn;                                            \
+        if (Vimm > 16) Vimm = 16;                                         \
+        Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8);     \
+        *Xmm0 = Fn(*Xmm1);                                                \
+      }                                                                   \
+    } else {                                                              \
+      psrldq(B, A, I);                                                    \
+    }                                                                     \
  } while (0)
 #endif

--- a/libc/log/log.h
+++ b/libc/log/log.h
@ -56,8 +56,9 @@ bool isrunningundermake(void);

 extern unsigned g_loglevel; /* log level for runtime check */

-#define LOGGABLE(LEVEL) \
-  ((!isconstant(LEVEL) || (LEVEL) <= LOGGABLELEVEL) && (LEVEL) <= g_loglevel)
+#define LOGGABLE(LEVEL)                                          \
+  ((!__builtin_constant_p(LEVEL) || (LEVEL) <= LOGGABLELEVEL) && \
+   (LEVEL) <= g_loglevel)

 #define LOGF(FMT, ...)                                               \
  do {                                                               \
--- a/libc/macros-cpp.internal.inc
+++ b/libc/macros-cpp.internal.inc
@ -1,5 +1,24 @@
-/* clang-format off */
+/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
+│vi: set et ft=asm ts=8 sw=8 fenc=utf-8                                     :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
 #include "ape/relocations.h"
+/* clang-format off */

 #if __MNO_VZEROUPPER__ + 0
 #define vzeroupper
--- a/libc/macros.h
+++ b/libc/macros.h
@ -13,6 +13,9 @@
 #define TRUE  1
 #define FALSE 0

+#define alignas(x) _Alignas(x)
+#define static_assert(x) _Static_assert(x, #x)
+
 #define ROUNDUP(X, K)       (((X) + (K)-1) & -(K))
 #define ROUNDDOWN(X, K)     ((X) & -(K))
 #define ABS(X)              ((X) >= 0 ? (X) : -(X))
@ -20,7 +23,7 @@
 #define MAX(X, Y)           ((Y) < (X) ? (X) : (Y))
 #define PASTE(A, B)         __PASTE(A, B)
 #define STRINGIFY(A)        __STRINGIFY(A)
-#define EQUIVALENT(X, Y)    (isconstant((X) == (Y)) && ((X) == (Y)))
+#define EQUIVALENT(X, Y)    (__builtin_constant_p((X) == (Y)) && ((X) == (Y)))
 #define TYPE_BIT(type)      (sizeof(type) * CHAR_BIT)
 #define TYPE_SIGNED(type)   (((type)-1) < 0)
 #define TYPE_INTEGRAL(type) (((type)0.5) != 0.5)
--- a/libc/macros.internal.inc
+++ b/libc/macros.internal.inc
@ -1,3 +1,23 @@
+/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
+│vi: set et ft=asm ts=8 sw=8 fenc=utf-8                                     :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+
 /	Shorthand notation for widely-acknowledged sections.
 .macro	.rodata
 	.section .rodata,"a",@progbits
--- a/libc/nexgen32e/crc32.h
+++ b/libc/nexgen32e/crc32.h
@ -5,7 +5,7 @@ COSMOPOLITAN_C_START_

 void crc32init(uint32_t[hasatleast 256], uint32_t);
 uint32_t crc32_z(uint32_t, const void *, size_t);
-extern uint32_t (*const crc32c)(uint32_t, const void *, size_t) paramsnonnull();
+extern uint32_t (*const crc32c)(uint32_t, const void *, size_t);
 uint32_t crc32c$pure(uint32_t, const void *, size_t) strlenesque hidden;
 uint32_t crc32c$sse42(uint32_t, const void *, size_t) strlenesque hidden;
 uint32_t crc32$pclmul(uint32_t, const void *, size_t) hidden;
--- a/libc/nexgen32e/crc32c-sse42.c
+++ b/libc/nexgen32e/crc32c-sse42.c
@ -23,7 +23,7 @@
 * Hashes data with hardware acceleration at 10GBps.
 * @note needs Nehalem+ c. 2008 or Bulldozer+ c. 2011
 */
-uint32_t crc32c$sse42(uint32_t init, const void *data, size_t n) {
+optimizespeed uint32_t crc32c$sse42(uint32_t init, const void *data, size_t n) {
  const unsigned char *p = (const unsigned char *)data;
  const unsigned char *pe = (const unsigned char *)data + n;
  uint32_t h = init ^ 0xffffffff;
--- a/libc/nexgen32e/memmove.inc
+++ b/libc/nexgen32e/memmove.inc
@ -1,467 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-.ident	"\n
-memmove (Licensed BSD-3)\n
-Copyright 2014 Intel Corporation"
-.include "libc/disclaimer.inc"
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef SHARED_CACHE_SIZE_HALF
-#define SHARED_CACHE_SIZE_HALF (4 * 1024 * 1024)
-#endif
-
-	push	%rbx
-	push	%rdx
-	push	%r8
-	push	%r9
-
-/* Check whether we should copy backward or forward.  */
-	cmp	%rsi, %rdi
-	je	L(mm_return)
-	jg	L(mm_len_0_or_more_backward)
-
-/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
-	separately.  */
-	cmp	$16, %rdx
-	jbe	L(mm_len_0_16_bytes_forward)
-
-	cmp	$32, %rdx
-	ja	L(mm_len_32_or_more_forward)
-
-/* Copy [0..32] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, -16(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_32_or_more_forward):
-	cmp	$64, %rdx
-	ja	L(mm_len_64_or_more_forward)
-
-/* Copy [0..64] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	-16(%rsi, %rdx), %xmm2
-	movdqu	-32(%rsi, %rdx), %xmm3
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, -16(%rdi, %rdx)
-	movdqu	%xmm3, -32(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_64_or_more_forward):
-	cmp	$128, %rdx
-	ja	L(mm_len_128_or_more_forward)
-
-/* Copy [0..128] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	32(%rsi), %xmm2
-	movdqu	48(%rsi), %xmm3
-	movdqu	-64(%rsi, %rdx), %xmm4
-	movdqu	-48(%rsi, %rdx), %xmm5
-	movdqu	-32(%rsi, %rdx), %xmm6
-	movdqu	-16(%rsi, %rdx), %xmm7
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, 32(%rdi)
-	movdqu	%xmm3, 48(%rdi)
-	movdqu	%xmm4, -64(%rdi, %rdx)
-	movdqu	%xmm5, -48(%rdi, %rdx)
-	movdqu	%xmm6, -32(%rdi, %rdx)
-	movdqu	%xmm7, -16(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_128_or_more_forward):
-/* Aligning the address of destination.  */
-/*  save first unaligned 64 bytes */
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	32(%rsi), %xmm2
-	movdqu	48(%rsi), %xmm3
-
-	lea	64(%rdi), %r8
-	and	$-64, %r8  /* r8 now aligned to next 64 byte boundary */
-	sub	%rdi, %rsi /* rsi = src - dst = diff */
-
-	movdqu	(%r8, %rsi), %xmm4
-	movdqu	16(%r8, %rsi), %xmm5
-	movdqu	32(%r8, %rsi), %xmm6
-	movdqu	48(%r8, %rsi), %xmm7
-
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, 32(%rdi)
-	movdqu	%xmm3, 48(%rdi)
-	movdqa	%xmm4, (%r8)
-	movaps	%xmm5, 16(%r8)
-	movaps	%xmm6, 32(%r8)
-	movaps	%xmm7, 48(%r8)
-	add	$64, %r8
-
-	lea	(%rdi, %rdx), %rbx
-	and	$-64, %rbx
-	cmp	%r8, %rbx
-	jbe	L(mm_copy_remaining_forward)
-
-	cmp	$SHARED_CACHE_SIZE_HALF, %rdx
-	jae	L(mm_large_page_loop_forward)
-
-	.p2align 4
-L(mm_main_loop_forward):
-
-	prefetcht0 128(%r8, %rsi)
-
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movdqa	%xmm0, (%r8)
-	movaps	%xmm1, 16(%r8)
-	movaps	%xmm2, 32(%r8)
-	movaps	%xmm3, 48(%r8)
-	lea	64(%r8), %r8
-	cmp	%r8, %rbx
-	ja	L(mm_main_loop_forward)
-
-L(mm_copy_remaining_forward):
-	add	%rdi, %rdx
-	sub	%r8, %rdx
-/* We copied all up till %rdi position in the dst.
-	In %rdx now is how many bytes are left to copy.
-	Now we need to advance %r8. */
-	lea	(%r8, %rsi), %r9
-
-L(mm_remaining_0_64_bytes_forward):
-	cmp	$32, %rdx
-	ja	L(mm_remaining_33_64_bytes_forward)
-	cmp	$16, %rdx
-	ja	L(mm_remaining_17_32_bytes_forward)
-	test	%rdx, %rdx
-	.p2align 4,,2
-	je	L(mm_return)
-
-	cmpb	$8, %dl
-	ja	L(mm_remaining_9_16_bytes_forward)
-	cmpb	$4, %dl
-	.p2align 4,,5
-	ja	L(mm_remaining_5_8_bytes_forward)
-	cmpb	$2, %dl
-	.p2align 4,,1
-	ja	L(mm_remaining_3_4_bytes_forward)
-	movzbl	-1(%r9,%rdx), %esi
-	movzbl	(%r9), %ebx
-	movb	%sil, -1(%r8,%rdx)
-	movb	%bl, (%r8)
-	jmp	L(mm_return)
-
-L(mm_remaining_33_64_bytes_forward):
-	movdqu	(%r9), %xmm0
-	movdqu	16(%r9), %xmm1
-	movdqu	-32(%r9, %rdx), %xmm2
-	movdqu	-16(%r9, %rdx), %xmm3
-	movdqu	%xmm0, (%r8)
-	movdqu	%xmm1, 16(%r8)
-	movdqu	%xmm2, -32(%r8, %rdx)
-	movdqu	%xmm3, -16(%r8, %rdx)
-	jmp	L(mm_return)
-
-L(mm_remaining_17_32_bytes_forward):
-	movdqu	(%r9), %xmm0
-	movdqu	-16(%r9, %rdx), %xmm1
-	movdqu	%xmm0, (%r8)
-	movdqu	%xmm1, -16(%r8, %rdx)
-	jmp	L(mm_return)
-
-L(mm_remaining_5_8_bytes_forward):
-	movl	(%r9), %esi
-	movl	-4(%r9,%rdx), %ebx
-	movl	%esi, (%r8)
-	movl	%ebx, -4(%r8,%rdx)
-	jmp	L(mm_return)
-
-L(mm_remaining_9_16_bytes_forward):
-	mov	(%r9), %rsi
-	mov	-8(%r9, %rdx), %rbx
-	mov	%rsi, (%r8)
-	mov	%rbx, -8(%r8, %rdx)
-	jmp	L(mm_return)
-
-L(mm_remaining_3_4_bytes_forward):
-	movzwl	-2(%r9,%rdx), %esi
-	movzwl	(%r9), %ebx
-	movw	%si, -2(%r8,%rdx)
-	movw	%bx, (%r8)
-	jmp	L(mm_return)
-
-L(mm_len_0_16_bytes_forward):
-	testb	$24, %dl
-	jne	L(mm_len_9_16_bytes_forward)
-	testb	$4, %dl
-	.p2align 4,,5
-	jne	L(mm_len_5_8_bytes_forward)
-	test	%rdx, %rdx
-	.p2align 4,,2
-	je	L(mm_return)
-	testb	$2, %dl
-	.p2align 4,,1
-	jne	L(mm_len_2_4_bytes_forward)
-	movzbl	-1(%rsi,%rdx), %ebx
-	movzbl	(%rsi), %esi
-	movb	%bl, -1(%rdi,%rdx)
-	movb	%sil, (%rdi)
-	jmp	L(mm_return)
-
-L(mm_len_2_4_bytes_forward):
-	movzwl	-2(%rsi,%rdx), %ebx
-	movzwl	(%rsi), %esi
-	movw	%bx, -2(%rdi,%rdx)
-	movw	%si, (%rdi)
-	jmp	L(mm_return)
-
-L(mm_len_5_8_bytes_forward):
-	movl	(%rsi), %ebx
-	movl	-4(%rsi,%rdx), %esi
-	movl	%ebx, (%rdi)
-	movl	%esi, -4(%rdi,%rdx)
-	jmp	L(mm_return)
-
-L(mm_len_9_16_bytes_forward):
-	mov	(%rsi), %rbx
-	mov	-8(%rsi, %rdx), %rsi
-	mov	%rbx, (%rdi)
-	mov	%rsi, -8(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_recalc_len):
-/* Compute in %rdx how many bytes are left to copy after
-	the main loop stops.  */
-	mov 	%rbx, %rdx
-	sub 	%rdi, %rdx
-/* The code for copying backwards.  */
-L(mm_len_0_or_more_backward):
-
-/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
-	separately.  */
-	cmp	$16, %rdx
-	jbe	L(mm_len_0_16_bytes_backward)
-
-	cmp	$32, %rdx
-	ja	L(mm_len_32_or_more_backward)
-
-/* Copy [0..32] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, -16(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_32_or_more_backward):
-	cmp	$64, %rdx
-	ja	L(mm_len_64_or_more_backward)
-
-/* Copy [0..64] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	-16(%rsi, %rdx), %xmm2
-	movdqu	-32(%rsi, %rdx), %xmm3
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, -16(%rdi, %rdx)
-	movdqu	%xmm3, -32(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_64_or_more_backward):
-	cmp	$128, %rdx
-	ja	L(mm_len_128_or_more_backward)
-
-/* Copy [0..128] and return.  */
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	32(%rsi), %xmm2
-	movdqu	48(%rsi), %xmm3
-	movdqu	-64(%rsi, %rdx), %xmm4
-	movdqu	-48(%rsi, %rdx), %xmm5
-	movdqu	-32(%rsi, %rdx), %xmm6
-	movdqu	-16(%rsi, %rdx), %xmm7
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, 32(%rdi)
-	movdqu	%xmm3, 48(%rdi)
-	movdqu	%xmm4, -64(%rdi, %rdx)
-	movdqu	%xmm5, -48(%rdi, %rdx)
-	movdqu	%xmm6, -32(%rdi, %rdx)
-	movdqu	%xmm7, -16(%rdi, %rdx)
-	jmp	L(mm_return)
-
-L(mm_len_128_or_more_backward):
-/* Aligning the address of destination. We need to save
-	16 bits from the source in order not to overwrite them.  */
-
-	movdqu	-16(%rsi, %rdx), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	movdqu	-48(%rsi, %rdx), %xmm2
-	movdqu	-64(%rsi, %rdx), %xmm3
-
-	lea	(%rdi, %rdx), %r9
-	and	$-64, %r9 /* r9 = aligned dst */
-
-	mov	%rsi, %r8
-	sub	%rdi, %r8 /* r8 = src - dst, diff */
-
-	movdqu	-16(%r9, %r8), %xmm4
-	movdqu	-32(%r9, %r8), %xmm5
-	movdqu	-48(%r9, %r8), %xmm6
-	movdqu	-64(%r9, %r8), %xmm7
-
-	movdqu	%xmm0, -16(%rdi, %rdx)
-	movdqu	%xmm1, -32(%rdi, %rdx)
-	movdqu	%xmm2, -48(%rdi, %rdx)
-	movdqu	%xmm3, -64(%rdi, %rdx)
-	movdqa	%xmm4, -16(%r9)
-	movaps	%xmm5, -32(%r9)
-	movaps	%xmm6, -48(%r9)
-	movaps	%xmm7, -64(%r9)
-	lea	-64(%r9), %r9
-
-	lea	64(%rdi), %rbx
-	and	$-64, %rbx
-
-	cmp	%r9, %rbx
-	jae	L(mm_recalc_len)
-
-	cmp	$SHARED_CACHE_SIZE_HALF, %rdx
-	jae	L(mm_large_page_loop_backward)
-
-	.p2align 4
-L(mm_main_loop_backward):
-
-	prefetcht0 -128(%r9, %r8)
-
-	movdqu	-64(%r9, %r8), %xmm0
-	movdqu	-48(%r9, %r8), %xmm1
-	movdqu	-32(%r9, %r8), %xmm2
-	movdqu	-16(%r9, %r8), %xmm3
-	movdqa	%xmm0, -64(%r9)
-	movaps	%xmm1, -48(%r9)
-	movaps	%xmm2, -32(%r9)
-	movaps	%xmm3, -16(%r9)
-	lea	-64(%r9), %r9
-	cmp	%r9, %rbx
-	jb	L(mm_main_loop_backward)
-	jmp	L(mm_recalc_len)
-
-/* Copy [0..16] and return.  */
-L(mm_len_0_16_bytes_backward):
-	testb	$24, %dl
-	jnz	L(mm_len_9_16_bytes_backward)
-	testb	$4, %dl
-	.p2align 4,,5
-	jnz	L(mm_len_5_8_bytes_backward)
-	test	%rdx, %rdx
-	.p2align 4,,2
-	je	L(mm_return)
-	testb	$2, %dl
-	.p2align 4,,1
-	jne	L(mm_len_3_4_bytes_backward)
-	movzbl	-1(%rsi,%rdx), %ebx
-	movzbl	(%rsi), %ecx
-	movb	%bl, -1(%rdi,%rdx)
-	movb	%cl, (%rdi)
-	jmp	L(mm_return)
-
-L(mm_len_3_4_bytes_backward):
-	movzwl	-2(%rsi,%rdx), %ebx
-	movzwl	(%rsi), %ecx
-	movw	%bx, -2(%rdi,%rdx)
-	movw	%cx, (%rdi)
-	jmp	L(mm_return)
-
-L(mm_len_9_16_bytes_backward):
-	movl	-4(%rsi,%rdx), %ebx
-	movl	-8(%rsi,%rdx), %ecx
-	movl	%ebx, -4(%rdi,%rdx)
-	movl	%ecx, -8(%rdi,%rdx)
-	sub	$8, %rdx
-	jmp	L(mm_len_0_16_bytes_backward)
-
-L(mm_len_5_8_bytes_backward):
-	movl	(%rsi), %ebx
-	movl	-4(%rsi,%rdx), %ecx
-	movl	%ebx, (%rdi)
-	movl	%ecx, -4(%rdi,%rdx)
-
-L(mm_return):
-	pop	%r9
-	pop	%r8
-	pop	%rdx
-	pop	%rbx
-	pop	%rbp
-	ret
-
-/* Big length copy forward part.  */
-
-	.p2align 4
-L(mm_large_page_loop_forward):
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movntdq	%xmm0, (%r8)
-	movntdq	%xmm1, 16(%r8)
-	movntdq	%xmm2, 32(%r8)
-	movntdq	%xmm3, 48(%r8)
-	lea 	64(%r8), %r8
-	cmp	%r8, %rbx
-	ja	L(mm_large_page_loop_forward)
-	sfence
-	jmp	L(mm_copy_remaining_forward)
-
-/* Big length copy backward part.  */
-	.p2align 4
-L(mm_large_page_loop_backward):
-	movdqu	-64(%r9, %r8), %xmm0
-	movdqu	-48(%r9, %r8), %xmm1
-	movdqu	-32(%r9, %r8), %xmm2
-	movdqu	-16(%r9, %r8), %xmm3
-	movntdq	%xmm0, -64(%r9)
-	movntdq	%xmm1, -48(%r9)
-	movntdq	%xmm2, -32(%r9)
-	movntdq	%xmm3, -16(%r9)
-	lea 	-64(%r9), %r9
-	cmp	%r9, %rbx
-	jb	L(mm_large_page_loop_backward)
-	sfence
-	jmp	L(mm_recalc_len)
--- a/libc/nexgen32e/strlen.S
+++ b/libc/nexgen32e/strlen.S
@ -0,0 +1,52 @@
+/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
+│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/macros.h"
+
+/	Returns length of NUL-terminated string.
+/
+/	@param	rdi is non-null NUL-terminated string pointer
+/	@return	rax is number of bytes (excluding NUL)
+/	@clob	ax,dx,cx,xmm3,xmm4
+/	@note	h/t agner fog
+/	@asyncsignalsafe
+strlen:	.leafprologue
+	.profilable
+	mov	%rdi,%rax
+	mov	%edi,%ecx
+	and	$15,%ecx
+	and	$-16,%rax
+	pxor	%xmm4,%xmm4
+	movdqa	(%rax),%xmm3
+	pcmpeqb	%xmm4,%xmm3
+	pmovmskb %xmm3,%edx
+	shr	%cl,%edx
+	shl	%cl,%edx
+	bsf	%edx,%edx
+	jnz	2f
+1:	lea	16(%rax),%rax
+	movdqa	(%rax),%xmm3
+	pcmpeqb	%xmm4,%xmm3
+	pmovmskb %xmm3,%edx
+	bsf	%edx,%edx
+	jz	1b
+2:	add	%rdx,%rax
+	sub	%rdi,%rax
+	.leafepilogue
+	.endfn	strlen,globl
--- a/libc/sock/bind.c
+++ b/libc/sock/bind.c
@ -42,8 +42,8 @@ int bind(int fd, const void *addr, uint32_t addrsize) {
        return bind$sysv(fd, addr, addrsize);
      } else {
        struct sockaddr_in$bsd addr2;
-        static_assert(sizeof(struct sockaddr_in) ==
-                      sizeof(struct sockaddr_in$bsd));
+        _Static_assert(sizeof(struct sockaddr_in) ==
+                       sizeof(struct sockaddr_in$bsd));
        memcpy(&addr2, addr, sizeof(struct sockaddr_in));
        sockaddr2bsd(&addr2);
        return bind$sysv(fd, &addr2, addrsize);
--- a/libc/sock/connect-sysv.c
+++ b/libc/sock/connect-sysv.c
@ -28,7 +28,8 @@ int connect$sysv(int fd, const void *addr, uint32_t addrsize) {
    return __connect$sysv(fd, addr, addrsize);
  } else {
    struct sockaddr_in$bsd addr2;
-    static_assert(sizeof(struct sockaddr_in) == sizeof(struct sockaddr_in$bsd));
+    _Static_assert(sizeof(struct sockaddr_in) ==
+                   sizeof(struct sockaddr_in$bsd));
    memcpy(&addr2, addr, sizeof(struct sockaddr_in));
    sockaddr2bsd(&addr2);
    return connect$sysv(fd, &addr2, addrsize);
--- a/libc/sock/sendto.c
+++ b/libc/sock/sendto.c
@ -47,7 +47,7 @@
 */
 ssize_t sendto(int fd, const void *buf, size_t size, uint32_t flags,
               const void *opt_addr, uint32_t addrsize) {
-  static_assert(sizeof(struct sockaddr_in) == sizeof(struct sockaddr_in$bsd));
+  _Static_assert(sizeof(struct sockaddr_in) == sizeof(struct sockaddr_in$bsd));
  if (!IsWindows()) {
    if (!IsBsd() || !opt_addr) {
      return sendto$sysv(fd, buf, size, flags, opt_addr, addrsize);
--- a/libc/stdio/fputc.c
+++ b/libc/stdio/fputc.c
@ -21,14 +21,8 @@
 #include "libc/stdio/internal.h"
 #include "libc/stdio/stdio.h"

-/**
- * Writes byte to stream.
- *
- * @return c (as unsigned char) if written or -1 w/ errno
- */
-noinstrument int fputc(int c, FILE *f) {
+static noinline int __fputcg(int c, FILE *f) {
  if (f->beg < f->size) {
-    c &= 0xff;
    f->buf[f->beg++] = c;
    if (f->beg == f->size || f->bufmode == _IONBF ||
        (f->bufmode == _IOLBF && c == '\n')) {
@ -38,8 +32,22 @@ noinstrument int fputc(int c, FILE *f) {
        f->beg = 0;
      }
    }
-    return c;
+    return c & 0xff;
  } else {
    return __fseteof(f);
  }
 }
+
+/**
+ * Writes byte to stream.
+ * @return c (as unsigned char) if written or -1 w/ errno
+ * @see putc() if called within loop
+ */
+noinstrument int fputc(int c, FILE *f) {
+  if (f->beg + 1 < f->size && f->bufmode == _IOFBF) {
+    f->buf[f->beg++] = c;
+    return c & 0xff;
+  } else {
+    return __fputcg(c, f);
+  }
+}
--- a/libc/stdio/fputcfb.c
+++ b/libc/stdio/fputcfb.c
@ -0,0 +1,53 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/stdio/internal.h"
+#include "libc/stdio/stdio.h"
+
+static noinline int slowpath(int c, FILE *f) {
+  if (f->beg < f->size) {
+    c &= 0xff;
+    f->buf[f->beg++] = c;
+    if (f->beg == f->size) {
+      if (f->writer) {
+        if (f->writer(f) == -1) return -1;
+      } else if (f->beg == f->size) {
+        f->beg = 0;
+      }
+    }
+    return c;
+  } else {
+    return __fseteof(f);
+  }
+}
+
+/**
+ * Writes byte to stream.
+ *
+ * @return c (as unsigned char) if written or -1 w/ errno
+ */
+noinstrument int fputcfb(int c, FILE *f) {
+  if (f->beg + 1 < f->size) {
+    c &= 0xff;
+    f->buf[f->beg++] = c;
+    return c;
+  } else {
+    return slowpath(c, f);
+  }
+}
--- a/libc/stdio/fputs.c
+++ b/libc/stdio/fputs.c
@ -38,7 +38,7 @@ int fputs(const char *s, FILE *f) {
  int i, n, m;
  n = strlen(s);
  for (i = 0; i < n; ++i) {
-    if (fputc(s[i], f) == -1) {
+    if (putc(s[i], f) == -1) {
      if (ferror(f) == EINTR) continue;
      if (feof(f)) errno = f->state = EPIPE;
      return -1;
--- a/libc/stdio/fread.c
+++ b/libc/stdio/fread.c
@ -25,7 +25,7 @@
 #include "libc/str/internal.h"

 /**
- * Reads data to stream.
+ * Reads data from stream.
 *
 * @param stride specifies the size of individual items
 * @param count is the number of strides to fetch
@ -36,7 +36,7 @@ size_t fread(void *buf, size_t stride, size_t count, FILE *f) {
  size_t i, n;
  unsigned char *p;
  for (n = stride * count, p = buf, i = 0; i < n; ++i) {
-    if ((c = fgetc(f)) != -1) {
+    if ((c = getc(f)) != -1) {
      p[i] = c & 0xff;
    } else if (!(i % stride)) {
      return i / stride;
--- a/libc/stdio/g_stdbuf.c
+++ b/libc/stdio/g_stdbuf.c
@ -19,5 +19,5 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/stdio/internal.h"

-alignas(PAGESIZE) unsigned char g_stdoutbuf[BUFSIZ];
-alignas(PAGESIZE) unsigned char g_stderrbuf[BUFSIZ];
+_Alignas(PAGESIZE) unsigned char g_stdoutbuf[BUFSIZ];
+_Alignas(PAGESIZE) unsigned char g_stderrbuf[BUFSIZ];
--- a/libc/stdio/stdio.h
+++ b/libc/stdio/stdio.h
@ -100,8 +100,8 @@ int vfscanf(FILE *, const char *, va_list);
 │ cosmopolitan § standard i/o » optimizations                              ─╬─│┼
 ╚────────────────────────────────────────────────────────────────────────────│*/

-#define putc(c, f) fputc(c, f)
 #define getc(f)    (f->beg < f->end ? f->buf[f->beg++] : fgetc(f))
+#define putc(c, f) fputc(c, f)

 #if defined(__GNUC__) && !defined(__STRICT_ANSI__)
 #define printf(FMT, ...)     (printf)(PFLINK(FMT), ##__VA_ARGS__)
--- a/libc/str/chomp.c
+++ b/libc/str/chomp.c
@ -25,7 +25,7 @@
 * @param line is NULL-propagating
 * @see getline
 */
-char *(chomp)(char *line) {
+char *chomp(char *line) {
  size_t i;
  for (i = strlen(line); i--;) {
    if (line[i] == '\r' || line[i] == '\n') {
--- a/libc/str/decodentsutf16.c
+++ b/libc/str/decodentsutf16.c
@ -20,6 +20,9 @@
 #include "libc/str/str.h"
 #include "libc/str/utf16.h"

+/**
+ * Helps runtime decode UTF-16 with slightly smaller code size.
+ */
 wint_t DecodeNtsUtf16(const char16_t **s) {
  wint_t x, y;
  for (;;) {
--- a/libc/str/hextoint.c
+++ b/libc/str/hextoint.c
@ -19,6 +19,10 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"

+/**
+ * Converts ASCII hexadecimal character to integer case-insensitively.
+ * @return integer or 0 if c ∉ [0-9A-Fa-f]
+ */
 int hextoint(int c) {
  if ('0' <= c && c <= '9') {
    return c - '0';
--- a/libc/str/isgraph.c
+++ b/libc/str/isgraph.c
@ -19,6 +19,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"

+/**
+ * Returns nonzero if c is printable ascii that isn't space.
+ */
 int isgraph(int c) {
  return 0x21 <= c && c <= 0x7E;
 }
--- a/libc/str/isprint.c
+++ b/libc/str/isprint.c
@ -19,6 +19,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"

+/**
+ * Returns nonzero if c is printable ascii including space.
+ */
 int isprint(int c) {
  return 0x20 <= c && c <= 0x7E;
 }
--- a/libc/str/ispunct.c
+++ b/libc/str/ispunct.c
@ -19,6 +19,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"

+/**
+ * Returns nonzero if c ∈ !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
+ */
 int ispunct(int c) {
  return (0x21 <= c && c <= 0x7E) && !('0' <= c && c <= '9') &&
         !('A' <= c && c <= 'Z') && !('a' <= c && c <= 'z');
--- a/libc/str/mbtowc.c
+++ b/libc/str/mbtowc.c
@ -24,7 +24,7 @@

 compatfn int mbtowc(wchar_t *wc, const char *s, size_t n) {
  if (!s) return 0;
-  alignas(8) char alt[ROUNDUP(MB_CUR_MAX, 8)];
+  _Alignas(8) char alt[ROUNDUP(MB_CUR_MAX, 8)];
  if (n < MB_CUR_MAX) {
    memset(alt, 0, sizeof(alt));
    memcpy(alt, s, n);
--- a/libc/str/str.h
+++ b/libc/str/str.h
@ -233,37 +233,21 @@ int iswctype(wint_t, wctype_t) pureconst;

 char *strsignal(int) returnsnonnull libcesque;

+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
 /*───────────────────────────────────────────────────────────────────────────│─╗
 │ cosmopolitan § strings » optimizations                                   ─╬─│┼
 ╚────────────────────────────────────────────────────────────────────────────│*/
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)

-#define explicit_bzero(STR, BYTES)                                          \
-  do {                                                                      \
-    void *Str;                                                              \
-    size_t Bytes;                                                           \
-    asm volatile("call\texplicit_bzero"                                     \
-                 : "=D"(Str), "=S"(Bytes)                                   \
-                 : "0"(STR), "1"(BYTES)                                     \
-                 : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", \
-                   "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");   \
-  } while (0)
+#define __memcpy_isgoodsize(SIZE)                                        \
+  (__builtin_constant_p(SIZE) && ((SIZE) <= __BIGGEST_ALIGNMENT__ * 2 && \
+                                  __builtin_popcountl((unsigned)(SIZE)) == 1))

-#ifdef UNBLOAT_STDARG
-#define __STR_XMM_CLOBBER
-#else
-#define __STR_XMM_CLOBBER "xmm3", "xmm4",
-#endif
-
-#define __memcpy_isgoodsize(SIZE)                              \
-  (isconstant(SIZE) && ((SIZE) <= __BIGGEST_ALIGNMENT__ * 2 && \
-                        __builtin_popcountl((unsigned)(SIZE)) == 1))
-
-#define __memset_isgoodsize(SIZE)                                       \
-  (isconstant(SIZE) && (((SIZE) <= __BIGGEST_ALIGNMENT__ &&             \
-                         __builtin_popcountl((unsigned)(SIZE)) == 1) || \
-                        ((SIZE) % __BIGGEST_ALIGNMENT__ == 0 &&         \
-                         (SIZE) / __BIGGEST_ALIGNMENT__ <= 3)))
+#define __memset_isgoodsize(SIZE)                   \
+  (__builtin_constant_p(SIZE) &&                    \
+   (((SIZE) <= __BIGGEST_ALIGNMENT__ &&             \
+     __builtin_popcountl((unsigned)(SIZE)) == 1) || \
+    ((SIZE) % __BIGGEST_ALIGNMENT__ == 0 &&         \
+     (SIZE) / __BIGGEST_ALIGNMENT__ <= 3)))

 #define memcpy(DEST, SRC, SIZE)                                  \
  (__memcpy_isgoodsize(SIZE) ? __builtin_memcpy(DEST, SRC, SIZE) \
@ -273,7 +257,18 @@ char *strsignal(int) returnsnonnull libcesque;
  (__memset_isgoodsize(SIZE) ? __builtin_memset(DEST, BYTE, SIZE) \
                             : __memset(DEST, BYTE, SIZE))

-#if defined(__STDC_HOSTED__) && (defined(__SSE2__) || defined(UNBLOAT_STDARG))
+#if defined(__STDC_HOSTED__) && defined(__SSE2__)
+
+#define strlen(STR)                                       \
+  (__builtin_constant_p(STR) ? __builtin_strlen(STR) : ({ \
+    size_t LeN;                                           \
+    const char *StR = (STR);                              \
+    asm("call\tstrlen"                                    \
+        : "=a"(LeN)                                       \
+        : "D"(StR), "m"(*(char(*)[0x7fffffff])StR)        \
+        : "rcx", "rdx", "xmm3", "xmm4", "cc");            \
+    LeN;                                                  \
+  }))

 #define memmove(DEST, SRC, SIZE) __memcpy("MemMove", (DEST), (SRC), (SIZE))

@ -291,7 +286,7 @@ char *strsignal(int) returnsnonnull libcesque;
    asm("call\t" FN                                                        \
        : "=m"(*(char(*)[SiZe])(DeSt))                                     \
        : "D"(DeSt), "S"(SrC), "d"(SiZe), "m"(*(const char(*)[SiZe])(SrC)) \
-        : __STR_XMM_CLOBBER "rcx", "cc");                                  \
+        : "xmm3", "xmm4", "rcx", "cc");                                    \
    DeSt;                                                                  \
  })

@ -302,11 +297,22 @@ char *strsignal(int) returnsnonnull libcesque;
    asm("call\tMemSet"                    \
        : "=m"(*(char(*)[SiZe])(DeSt))    \
        : "D"(DeSt), "S"(BYTE), "d"(SiZe) \
-        : __STR_XMM_CLOBBER "rcx", "cc"); \
+        : "xmm3", "xmm4", "rcx", "cc");   \
    DeSt;                                 \
  })

-#else /* hosted/sse2/unbloat */
+#define explicit_bzero(STR, BYTES)                                          \
+  do {                                                                      \
+    void *Str;                                                              \
+    size_t Bytes;                                                           \
+    asm volatile("call\texplicit_bzero"                                     \
+                 : "=D"(Str), "=S"(Bytes)                                   \
+                 : "0"(STR), "1"(BYTES)                                     \
+                 : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", \
+                   "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");   \
+  } while (0)
+
+#else /* hosted+sse2 */

 #define mempcpy(DEST, SRC, SIZE)                                           \
  ({                                                                       \
@ -347,7 +353,6 @@ char *strsignal(int) returnsnonnull libcesque;
  })

 #endif /* hosted/sse2/unbloat */
-
 #endif /* __GNUC__ && !__STRICT_ANSI__ */
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
--- a/libc/str/strlcpy.c
+++ b/libc/str/strlcpy.c
@ -18,6 +18,7 @@
 │ 02110-1301 USA                                                               │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/bits/safemacros.internal.h"
+#include "libc/macros.h"
 #include "libc/str/str.h"

 /**
@ -34,7 +35,7 @@ size_t strlcpy(char *d, const char *s, size_t n) {
  size_t slen, actual;
  slen = strlen(s);
  if (n) {
-    actual = min(n, slen);
+    actual = MIN(n, slen);
    memcpy(d, s, actual);
    d[actual] = '\0';
  }
--- a/libc/str/strsignal.c
+++ b/libc/str/strsignal.c
@ -24,7 +24,7 @@
 static const char kSig[4] = "SIG";
 static const char kUnknown[8] = "UNKNOWN";

-alignas(1) static const char kStrSignals[][8] = {
+_Alignas(char) static const char kStrSignals[][8] = {
    "EXIT",   "HUP",  "INT",    "QUIT", "ILL",   "TRAP", "ABRT", "BUS",
    "FPE",    "KILL", "USR1",   "SEGV", "USR2",  "PIPE", "ALRM", "TERM",
    "STKFLT", "CHLD", "CONT",   "STOP", "TSTP",  "TTIN", "TTOU", "URG",
--- a/libc/str/tpdecodecb.internal.h
+++ b/libc/str/tpdecodecb.internal.h
@ -3,6 +3,8 @@
 #include "libc/nexgen32e/bsr.h"
 #if !(__ASSEMBLER__ + __LINKER__ + 0)

+/* TODO(jart): DELETE? */
+
 /**
 * Generic Thompson-Pike Varint Decoder.
 * @return number of bytes successfully consumed or -1 w/ errno
@ -12,11 +14,11 @@
 forceinline int tpdecodecb(wint_t *out, int first,
                           int get(void *arg, uint32_t i), void *arg) {
  uint32_t wc, cb, need, msb, j, i = 1;
-  if (unlikely((wc = first) == -1)) return -1;
-  while (unlikely((wc & 0b11000000) == 0b10000000)) {
+  if (__builtin_expect((wc = first) == -1, 0)) return -1;
+  while (__builtin_expect((wc & 0b11000000) == 0b10000000, 0)) {
    if ((wc = get(arg, i++)) == -1) return -1;
  }
-  if (unlikely(!(0 <= wc && wc <= 0x7F))) {
+  if (__builtin_expect(!(0 <= wc && wc <= 0x7F), 0)) {
    msb = wc < 252 ? bsr(~wc & 0xff) : 1;
    need = 7 - msb;
    wc &= ((1u << msb) - 1) | 0b00000011;
@ -30,7 +32,7 @@ forceinline int tpdecodecb(wint_t *out, int first,
      }
    }
  }
-  if (likely(out)) *out = (wint_t)wc;
+  if (__builtin_expect(!!out, 1)) *out = (wint_t)wc;
  return i;
 }

--- a/libc/str/tpencode.ncabi.c
+++ b/libc/str/tpencode.ncabi.c
@ -21,6 +21,8 @@
 #include "libc/str/tpenc.h"
 #include "libc/str/tpencode.internal.h"

+/* TODO: DELETE */
+
 /**
 * Thompson-Pike Varint Encoder.
 *
--- a/libc/str/utf16.h
+++ b/libc/str/utf16.h
@ -11,13 +11,14 @@ COSMOPOLITAN_C_START_
 #define IsUcs2(wc)         (((wc)&UTF16_MASK) != UTF16_MOAR)
 #define IsUtf16Cont(wc)    (((wc)&UTF16_MASK) == UTF16_CONT)
 #define MergeUtf16(lo, hi) ((((lo)-0xD800) << 10) + ((hi)-0xDC00) + 0x10000)
-#define EncodeUtf16(wc)                                   \
-  (likely((0x0000 <= (wc) && (wc) <= 0xFFFF) ||           \
-          (0xE000 <= (wc) && (wc) <= 0xFFFF))             \
-       ? (wc)                                             \
-       : 0x10000 <= (wc) && (wc) <= 0x10FFFF              \
-             ? (((((wc)-0x10000) >> 10) + 0xD800) |       \
-                ((((wc)-0x10000) & 1023) + 0xDC00) << 16) \
+#define EncodeUtf16(wc)                                    \
+  (__builtin_expect(((0x0000 <= (wc) && (wc) <= 0xFFFF) || \
+                     (0xE000 <= (wc) && (wc) <= 0xFFFF)),  \
+                    1)                                     \
+       ? (wc)                                              \
+       : 0x10000 <= (wc) && (wc) <= 0x10FFFF               \
+             ? (((((wc)-0x10000) >> 10) + 0xD800) |        \
+                ((((wc)-0x10000) & 1023) + 0xDC00) << 16)  \
             : 0xFFFD)

 COSMOPOLITAN_C_END_
--- a/libc/testlib/ugly.h
+++ b/libc/testlib/ugly.h
@ -14,9 +14,9 @@
 #define __BENCH_ARRAY(S) \
  _Section(".piro.relo.sort.bench.2." #S ",\"aw\",@init_array #")

-#define __TEST_PROTOTYPE(S, N, A, K)                             \
-  testonly void S##_##N(void);                                   \
-  alignas(8) const void *const S##_##N##_ptr[] A(S) = {S##_##N}; \
+#define __TEST_PROTOTYPE(S, N, A, K)                                 \
+  testonly void S##_##N(void);                                       \
+  _Alignas(long) const void *const S##_##N##_ptr[] A(S) = {S##_##N}; \
  testonly K void S##_##N(void)

 #define __TEST_SECTION(NAME, CONTENT) \
--- a/libc/x/x.h
+++ b/libc/x/x.h
@ -45,6 +45,7 @@ char *xstrdup(const char *) _XPNN _XMAL;
 char *xstrndup(const char *, size_t) _XPNN _XMAL;
 char *xstrcat(const char *, ...) paramsnonnull((1)) nullterminated() _XMAL;
 char *xstrmul(const char *, size_t) paramsnonnull((1)) _XMAL;
+char *xdirname(const char *) paramsnonnull() _XMAL;
 char *xjoinpaths(const char *, const char *) paramsnonnull() _XMAL;
 char *xinet_ntop(int, const void *) _XPNN _XMAL;

--- a/libc/x/xdirname.c
+++ b/libc/x/xdirname.c
@ -0,0 +1,28 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/fmt/conv.h"
+#include "libc/x/x.h"
+
+/**
+ * Returns directory portion of path.
+ */
+char *xdirname(const char *path) {
+  return dirname(xstrdup(path));
+}
--- a/libc/x/xjoinpaths.c
+++ b/libc/x/xjoinpaths.c
@ -22,14 +22,25 @@
 #include "libc/x/x.h"

 /**
- * Joins paths.
+ * Joins paths, e.g.
+ *
+ *   "a"  + "b"  → "a/b"
+ *   "a/" + "b"  → "a/b"
+ *   "a"  + "b/" → "a/b/"
+ *   "a"  + "/b" → "/b"
+ *   "."  + "b"  → "b"
+ *   ""   + "b"  → "b"
+ *
+ * @return newly allocated string of resulting path
 */
 char *xjoinpaths(const char *path, const char *other) {
  if (!*other) {
    return xstrdup(path);
-  } else if (startswith(other, "/") || strcmp(path, ".") == 0) {
+  } else if (!*path) {
    return xstrdup(other);
-  } else if (endswith(other, "/")) {
+  } else if (startswith(other, "/") || !strcmp(path, ".")) {
+    return xstrdup(other);
+  } else if (endswith(path, "/")) {
    return xstrcat(path, other);
  } else {
    return xstrcat(path, '/', other);
--- a/test/libc/fmt/basename_test.c
+++ b/test/libc/fmt/basename_test.c
@ -44,13 +44,3 @@ TEST(basename, testWindows_isGrantedRespect) {
  EXPECT_STREQ("there", basename("hello\\there"));
  EXPECT_STREQ("yo", basename("hello\\there\\yo"));
 }
-
-TEST(dirname, test) {
-  EXPECT_STREQ("/usr", dirname(strdup("/usr/lib")));
-  EXPECT_STREQ("usr", dirname(strdup("usr/lib")));
-  EXPECT_STREQ("/", dirname(strdup("/usr/")));
-  EXPECT_STREQ("/", dirname(strdup("/")));
-  EXPECT_STREQ(".", dirname(strdup("hello")));
-  EXPECT_STREQ(".", dirname(strdup(".")));
-  EXPECT_STREQ(".", dirname(strdup("..")));
-}
--- a/test/libc/fmt/dirname_test.c
+++ b/test/libc/fmt/dirname_test.c
@ -0,0 +1,34 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/fmt/conv.h"
+#include "libc/fmt/fmt.h"
+#include "libc/mem/mem.h"
+#include "libc/runtime/gc.h"
+#include "libc/testlib/testlib.h"
+
+TEST(dirname, test) {
+  EXPECT_STREQ("/usr", dirname(gc(strdup("/usr/lib"))));
+  EXPECT_STREQ("usr", dirname(gc(strdup("usr/lib"))));
+  EXPECT_STREQ("/", dirname(gc(strdup("/usr/"))));
+  EXPECT_STREQ("/", dirname(gc(strdup("/"))));
+  EXPECT_STREQ(".", dirname(gc(strdup("hello"))));
+  EXPECT_STREQ(".", dirname(gc(strdup("."))));
+  EXPECT_STREQ(".", dirname(gc(strdup(".."))));
+}
--- a/test/libc/stdio/fputs_test.c
+++ b/test/libc/stdio/fputs_test.c
@ -0,0 +1,33 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/mem/mem.h"
+#include "libc/runtime/gc.h"
+#include "libc/stdio/stdio.h"
+#include "libc/testlib/ezbench.h"
+#include "libc/testlib/hyperion.h"
+#include "libc/testlib/testlib.h"
+
+BENCH(fputs, bench) {
+  FILE *f;
+  char *buf;
+  buf = gc(malloc(kHyperionSize));
+  EZBENCH2("fputs", f = fmemopen(buf, kHyperionSize, "r+"),
+           fputs(kHyperion, f));
+}
--- a/test/libc/stdio/fread_test.c
+++ b/test/libc/stdio/fread_test.c
@ -0,0 +1,36 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/mem/mem.h"
+#include "libc/runtime/gc.h"
+#include "libc/stdio/stdio.h"
+#include "libc/testlib/ezbench.h"
+#include "libc/testlib/hyperion.h"
+#include "libc/testlib/testlib.h"
+
+BENCH(fputs, bench) {
+  FILE *f;
+  char *buf = gc(malloc(kHyperionSize));
+  char *buf2 = gc(malloc(kHyperionSize));
+  buf2 = gc(malloc(kHyperionSize));
+  f = fmemopen(buf, kHyperionSize, "r+");
+  ASSERT_EQ(kHyperionSize, fread(buf2, 1, kHyperionSize, f));
+  EZBENCH2("fread", f = fmemopen(buf, kHyperionSize, "r+"),
+           fread(buf2, 1, kHyperionSize, f));
+}
--- a/test/libc/str/bsr_test.c
+++ b/test/libc/str/bsr_test.c
@ -17,34 +17,25 @@
 │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
 │ 02110-1301 USA                                                               │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/assert.h"
-#include "libc/intrin/pcmpeqb.h"
-#include "libc/intrin/pmovmskb.h"
-#include "libc/nexgen32e/bsf.h"
+#include "libc/nexgen32e/bsr.h"
 #include "libc/str/str.h"
+#include "libc/testlib/testlib.h"

-/**
- * Returns length of NUL-terminated string.
- *
- * @param s is non-null NUL-terminated string pointer
- * @return number of bytes (excluding NUL)
- * @asyncsignalsafe
- */
-size_t strlen(const char *s) {
-  const char *p;
-  unsigned k, m;
-  uint8_t v1[16], vz[16];
-  k = (uintptr_t)s & 15;
-  p = (const char *)((uintptr_t)s & -16);
-  memset(vz, 0, 16);
-  memcpy(v1, p, 16);
-  pcmpeqb(v1, v1, vz);
-  m = pmovmskb(v1) >> k << k;
-  while (!m) {
-    p += 16;
-    memcpy(v1, p, 16);
-    pcmpeqb(v1, v1, vz);
-    m = pmovmskb(v1);
-  }
-  return p + bsf(m) - s;
+static unsigned Bsr(unsigned x) {
+  static const char kDebruijn[32] = {
+      0, 9,  1,  10, 13, 21, 2,  29, 11, 14, 16, 18, 22, 25, 3, 30,
+      8, 12, 20, 28, 15, 17, 24, 7,  19, 27, 23, 6,  26, 5,  4, 31,
+  };
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+  x *= 0x07c4acdd;
+  x >>= 27;
+  return kDebruijn[x];
+}
+
+TEST(bsr, test) {
+  ASSERT_EQ(bsr(0xffffffff), Bsr(0xffffffff));
 }
--- a/test/libc/str/strlen_test.c
+++ b/test/libc/str/strlen_test.c
@ -71,11 +71,15 @@ TEST(strnlen, testconst) {
 }

 TEST(strlen, testnonconst) {
+  /* this test case is a great example of why we need:
+       "m"(*(char(*)[0x7fffffff])StR)
+     rather than:
+       "m"(*StR) */
  char buf[256];
  unsigned i;
-  for (i = 0; i < 255; ++i) buf[i] = i + 1;
+  for (i = 0; i < 250; ++i) buf[i] = i + 1;
  buf[i] = '\0';
-  ASSERT_EQ(255, strlen(buf));
+  ASSERT_EQ(250, strlen(buf));
 }

 TEST(strnlen_s, null_ReturnsZero) {
--- a/test/libc/x/xjoinpaths_test.c
+++ b/test/libc/x/xjoinpaths_test.c
@ -0,0 +1,31 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ This program is free software; you can redistribute it and/or modify         │
+│ it under the terms of the GNU General Public License as published by         │
+│ the Free Software Foundation; version 2 of the License.                      │
+│                                                                              │
+│ This program is distributed in the hope that it will be useful, but          │
+│ WITHOUT ANY WARRANTY; without even the implied warranty of                   │
+│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │
+│ General Public License for more details.                                     │
+│                                                                              │
+│ You should have received a copy of the GNU General Public License            │
+│ along with this program; if not, write to the Free Software                  │
+│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │
+│ 02110-1301 USA                                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/testlib/testlib.h"
+#include "libc/x/x.h"
+
+TEST(xjoinpaths, test) {
+  EXPECT_STREQ("", gc(xjoinpaths("", "")));
+  EXPECT_STREQ("b", gc(xjoinpaths("", "b")));
+  EXPECT_STREQ("a/b", gc(xjoinpaths("a", "b")));
+  EXPECT_STREQ("a/b", gc(xjoinpaths("a/", "b")));
+  EXPECT_STREQ("a/b/", gc(xjoinpaths("a", "b/")));
+  EXPECT_STREQ("/b", gc(xjoinpaths("a", "/b")));
+  EXPECT_STREQ("b", gc(xjoinpaths(".", "b")));
+}
--- a/test/net/http/uricspn_test.c
+++ b/test/net/http/uricspn_test.c
@ -22,7 +22,7 @@
 #include "libc/testlib/testlib.h"
 #include "net/http/uri.h"

-alignas(32) const char kWinsockIcoPngBase64[] = "\
+_Alignas(32) const char kWinsockIcoPngBase64[] = "\
 base64,iVBORw0KGgoAAAANSUhEUgAAAJcAAACXCAYAAAAYn8l5AAAABmJLR0QA/\
 wD/AP+gvaeTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH4woLByMP6uwgW\
 QAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAMeSURBVHja7\
--- a/third_party/chibicc/README.cosmo
+++ b/third_party/chibicc/README.cosmo
@ -30,6 +30,7 @@ local bug fixes
 - fix 64-bit bug in generated code for struct bitfields
 - fix bug where last statement in statement expression couldn't have label
 - print_tokens (chibicc -E) now works in the case of adjacent string literals
+- make enums unsigned (like gcc) so we don't suffer the msvc enum bitfield bug

 local changes

--- a/third_party/chibicc/as.c
+++ b/third_party/chibicc/as.c
--- a/third_party/chibicc/asm.c
+++ b/third_party/chibicc/asm.c
@ -679,8 +679,7 @@ static void StoreAsmOutputs(Asm *a) {
            println("\tmov\t%%%s,(%%rax)", kGreg[z][a->ops[i].reg]);
          } else {
            println("\tpush\t%%rbx");
-            push();
-            pop("%rbx");
+            println("\tmov\t%%rax,%%rbx");
            gen_addr(a->ops[i].node);
            println("\tmov\t%%%s,(%%rax)", kGreg[z][3]);
            println("\tpop\t%%rbx");
--- a/third_party/chibicc/chibicc.c
+++ b/third_party/chibicc/chibicc.c
@ -41,6 +41,7 @@ static bool opt_c;
 static bool opt_cc1;
 static bool opt_hash_hash_hash;
 static bool opt_static;
+static bool opt_save_temps;
 static char *opt_MF;
 static char *opt_MT;
 static char *opt_o;
@ -140,6 +141,7 @@ static char *quote_makefile(char *s) {

 static void PrintMemoryUsage(void) {
  struct mallinfo mi;
+  malloc_trim(0);
  mi = mallinfo();
  fprintf(stderr, "\n");
  fprintf(stderr, "allocated %,ld bytes of memory\n", mi.arena);
@ -342,7 +344,7 @@ static char *replace_extn(char *tmpl, char *extn) {
 }

 static void cleanup(void) {
-  if (tmpfiles) {
+  if (tmpfiles && !opt_save_temps) {
    for (int i = 0; tmpfiles[i]; i++) {
      unlink(tmpfiles[i]);
    }
@ -350,7 +352,7 @@ static void cleanup(void) {
 }

 static char *create_tmpfile(void) {
-  char *path = xstrcat(kTmpPath, "chibicc-XXXXXX");
+  char *path = xjoinpaths(kTmpPath, "chibicc-XXXXXX");
  int fd = mkstemp(path);
  if (fd == -1) error("mkstemp failed: %s", strerror(errno));
  close(fd);
@ -383,6 +385,7 @@ static void run_subprocess(char **argv) {
    }
  }
  if (status != 0) {
+    opt_save_temps = true;
    exit(1);
  }
 }
@ -545,6 +548,7 @@ static void cc1(void) {
 static void assemble(char *input, char *output) {
  char *as = getenv("AS");
  if (!as || !*as) as = "as";
+  /* as = "o//third_party/chibicc/as.com"; */
  StringArray arr = {};
  strarray_push(&arr, as);
  strarray_push(&arr, "-W");
--- a/third_party/chibicc/chibicc.h
+++ b/third_party/chibicc/chibicc.h
@ -16,6 +16,7 @@
 #include "libc/mem/mem.h"
 #include "libc/nexgen32e/bsf.h"
 #include "libc/nexgen32e/bsr.h"
+#include "libc/nexgen32e/crc32.h"
 #include "libc/runtime/runtime.h"
 #include "libc/stdio/stdio.h"
 #include "libc/stdio/temp.h"
@ -126,8 +127,8 @@ char *read_file(char *);
 int read_escaped_char(char **, char *);

 #define UNREACHABLE()    error("internal error at %s:%d", __FILE__, __LINE__)
-#define EQUAL(T, S)      equal(T, S, strlen(S))
-#define CONSUME(R, T, S) consume(R, T, S, strlen(S))
+#define EQUAL(T, S)      equal(T, S, sizeof(S) - 1)
+#define CONSUME(R, T, S) consume(R, T, S, sizeof(S) - 1)

 //
 // preprocess.c
--- a/third_party/chibicc/codegen.c
+++ b/third_party/chibicc/codegen.c
@ -616,11 +616,9 @@ static bool has_flonum2(Type *ty) {
 static void push_struct(Type *ty) {
  int sz = ROUNDUP(ty->size, 8);
  println("\tsub\t$%d,%%rsp", sz);
+  println("\tmov\t%%rsp,%%rdi");
  depth += sz / 8;
-  for (int i = 0; i < ty->size; i++) {
-    println("\tmov\t%d(%%rax),%%r10b", i);
-    println("\tmov\t%%r10b,%d(%%rsp)", i);
-  }
+  gen_memcpy(ty->size);
 }

 static void push_args2(Node *args, bool first_pass) {
@ -781,7 +779,7 @@ static void copy_ret_buffer(Obj *var) {
      }
    } else {
      char *reg1 = (gp == 0) ? "%al" : "%dl";
-      char *reg2 = (gp == 0) ? "%rax" : "%rdx";
+      char *reg2 = (gp == 0) ? "%rax" : "%rdx"; /* TODO: isn't ax clobbered? */
      for (int i = 8; i < MIN(16, ty->size); i++) {
        println("\tmov\t%s,%d(%%rbp)", reg1, var->offset + i);
        println("\tshr\t$8,%s", reg2);
@ -2241,7 +2239,7 @@ static void emit_data(Obj *prog) {
      }
      print_align(align);
      println("\t.type\t%s,@object", nameof(var));
-      println("\t.size\t%s,%d", nameof(var), var->ty->size);
+      /* println("\t.size\t%s,%d", nameof(var), var->ty->size); */
      println("%s:", nameof(var));
      if (var->init_data) {
        int pos = 0;
@ -2425,7 +2423,7 @@ static void emit_text(Obj *prog) {
      emitlin("\tleave");
      emitlin("\tret");
    }
-    println("\t.size\t%s,.-%s", nameof(fn), nameof(fn));
+    /* println("\t.size\t%s,.-%s", nameof(fn), nameof(fn)); */
    if (fn->is_constructor) {
      emitlin("\t.section .ctors,\"aw\",@progbits");
      emitlin("\t.align\t8");
@ -2448,6 +2446,7 @@ static void emit_staticasms(StaticAsm *a) {
 void codegen(Obj *prog, FILE *out) {
  output_stream = out;
  File **files = get_input_files();
+  println("# -*- mode:unix-assembly -*-");
  for (int i = 0; files[i]; i++) {
    println("\t.file\t%d %`'s", files[i]->file_no, files[i]->name);
  }
--- a/third_party/chibicc/hashmap.c
+++ b/third_party/chibicc/hashmap.c
@ -35,8 +35,9 @@ static void rehash(HashMap *map) {
  map2.capacity = cap;
  for (int i = 0; i < map->capacity; i++) {
    HashEntry *ent = &map->buckets[i];
-    if (ent->key && ent->key != TOMBSTONE)
+    if (ent->key && ent->key != TOMBSTONE) {
      hashmap_put2(&map2, ent->key, ent->keylen, ent->val);
+    }
  }
  assert(map2.used == nkeys);
  *map = map2;
--- a/third_party/chibicc/hog.s
+++ b/third_party/chibicc/hog.s
@ -1,34 +0,0 @@
-	.globl	_start
-_start:	rep movsb
-/	add	$333333,%rcx
-/	test	%eax,(%rcx)			# 85 0001
-	push	%r15
-	pop	%r15
-	mov	%al,%bl
-	mov	%eax,%ecx			# 89 0301
-	mov	%ecx,%eax			# 89 0310
-	mov	%ecx,(%rbx)
-	mov	(%rbx),%ecx
-	mov	%xmm0,%xmm1
-	movb	$1,(%rax)
-	movl	$1,(%rax)
-	movl	$1,0xffffff(%rax,%rbx,8)
-	mov	$1,%bl
-	mov	$123,%r8d
-/	mov	%ebx,%r8d
-/	mov	(%r8),%ebx
-/	mov	%ebx,(%r8)
-/	movd	%eax,%xmm0
-/	movdqa	%xmm0,%xmm8
-/	movdqa	%xmm8,%xmm1
-/	paddw	%xmm8,%xmm1
-/	paddw	%xmm1,%xmm8
-	hlt
-	ret	$1
-
-a:	.asciz	"ho","ggg"
-	.align	8
-	.section .text.yo
-	.zero	1
-	.byte	1
-	.hidden	doge
--- a/third_party/chibicc/parse.c
+++ b/third_party/chibicc/parse.c
@ -16,6 +16,7 @@
 // So it is very easy to lookahead arbitrary number of tokens in this
 // parser.

+#include "libc/testlib/testlib.h"
 #include "third_party/chibicc/chibicc.h"

 typedef struct InitDesg InitDesg;
@ -577,6 +578,8 @@ static Token *thing_attributes(Token *tok, void *arg) {
  error_tok(tok, "unknown function attribute");
 }

+Token *to;
+
 // declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long"
 //             | "typedef" | "static" | "extern" | "inline"
 //             | "_Thread_local" | "__thread"
@ -641,6 +644,7 @@ static Type *declspec(Token **rest, Token *tok, VarAttr *attr) {
      if (attr->is_typedef &&
          attr->is_static + attr->is_extern + attr->is_inline + attr->is_tls >
              1) {
+        to = tok;
        error_tok(tok, "typedef may not be used together with static,"
                       " extern, inline, __thread or _Thread_local");
      }
@ -2577,8 +2581,9 @@ static Node *unary(Token **rest, Token *tok) {
  if (EQUAL(tok, "&")) {
    Node *lhs = cast(rest, tok->next);
    add_type(lhs);
-    if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield)
+    if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield) {
      error_tok(tok, "cannot take address of bitfield");
+    }
    return new_unary(ND_ADDR, lhs, tok);
  }
  if (EQUAL(tok, "*")) {
@ -2995,13 +3000,13 @@ static Node *primary(Token **rest, Token *tok) {
    if (node->ty->kind == TY_VLA) return new_var_node(node->ty->vla_size, tok);
    return new_ulong(node->ty->size, tok);
  }
-  if (EQUAL(tok, "_Alignof") && EQUAL(tok->next, "(") &&
-      is_typename(tok->next->next)) {
+  if ((EQUAL(tok, "_Alignof") || EQUAL(tok, "__alignof__")) &&
+      EQUAL(tok->next, "(") && is_typename(tok->next->next)) {
    Type *ty = typename(&tok, tok->next->next);
    *rest = skip(tok, ')');
    return new_ulong(ty->align, tok);
  }
-  if (EQUAL(tok, "_Alignof")) {
+  if (EQUAL(tok, "_Alignof") || EQUAL(tok, "__alignof__")) {
    Node *node = unary(rest, tok->next);
    add_type(node);
    return new_ulong(node->ty->align, tok);
@ -3338,7 +3343,7 @@ static Token *function(Token *tok, Type *basety, VarAttr *attr) {
    fn->asmname = ConsumeStringLiteral(&tok, tok);
    tok = skip(tok, ')');
  }
-  tok = attribute_list(tok, &attr, thing_attributes);
+  tok = attribute_list(tok, attr, thing_attributes);
  if (CONSUME(&tok, tok, ";")) return tok;
  current_fn = fn;
  locals = NULL;
--- a/third_party/chibicc/preprocess.c
+++ b/third_party/chibicc/preprocess.c
@ -686,7 +686,7 @@ static char *detect_include_guard(Token *tok) {
  char *macro = strndup(tok->loc, tok->len);
  tok = tok->next;
  if (!is_hash(tok) || !EQUAL(tok->next, "define") ||
-      !EQUAL(tok->next->next, macro))
+      !equal(tok->next->next, macro, strlen(macro)))
    return NULL;
  // Read until the end of the file.
  while (tok->kind != TK_EOF) {
--- a/third_party/chibicc/test/bitfield_test.c
+++ b/third_party/chibicc/test/bitfield_test.c
@ -1,5 +1,7 @@
 #include "third_party/chibicc/test/test.h"

+#define TYPE_SIGNED(type) (((type)-1) < 0)
+
 struct {
  char a;
  int b : 5;
@ -7,6 +9,27 @@ struct {
 } g45 = {1, 2, 3}, g46 = {};

 int main() {
+
+  /* NOTE: Consistent w/ GCC (but MSVC would fail this) */
+  ASSERT(1, 2 == ({
+              struct {
+                enum { a, b, c } e : 2;
+              } x = {
+                  .e = 2,
+              };
+              x.e;
+            }));
+
+  /* NOTE: GCC forbids typeof(bitfield). */
+  ASSERT(0, ({
+           struct {
+             enum { a, b, c } e : 2;
+           } x = {
+               .e = 2,
+           };
+           TYPE_SIGNED(typeof(x.e));
+         }));
+
  ASSERT(4, sizeof(struct { int x : 1; }));
  ASSERT(8, sizeof(struct { long x : 1; }));

--- a/third_party/chibicc/test/dce_test.c
+++ b/third_party/chibicc/test/dce_test.c
@ -1,42 +1,26 @@
 #include "third_party/chibicc/test/test.h"

-int x;
+#define CRASH    \
+  ({             \
+    asm(".err"); \
+    666;         \
+  })

 int main(void) {
+
  if (0) {
-    asm(".error \"the assembler shall fail\"");
+    return CRASH;
  }

-  x = 1 ? 777 : ({
-    asm(".error \"the system is down\"");
-    666;
-  });
-  ASSERT(777, x);
+  if (1) {
+  } else {
+    return CRASH;
+  }

-  x = 0;
-  x = 777 ?: ({
-    asm(".error \"the system is down\"");
-    666;
-  });
-
-  x = 0;
-  x = __builtin_popcount(strlen("hihi")) == 1 ? 777 : ({
-    asm(".error \"the system is down\"");
-    666;
-  });
-  ASSERT(777, x);
-
-  x = 0;
-  x = strpbrk("hihi", "ei") ? 777 : ({
-    asm(".error \"the system is down!\"");
-    666;
-  });
-  ASSERT(777, x);
-
-  x = 0;
-  x = !__builtin_strpbrk("HELLO\n", "bxdinupo") ? 777 : ({
-    asm(".error \"the system is down\"");
-    666;
-  });
-  ASSERT(777, x);
+  ASSERT(777, 777 ?: CRASH);
+  ASSERT(777, 1 ? 777 : CRASH);
+  ASSERT(777, 0 ? CRASH : 777);
+  ASSERT(777, __builtin_popcount(__builtin_strlen("hihi")) == 1 ? 777 : CRASH);
+  ASSERT(777, !__builtin_strpbrk("HELLO\n", "bxdinupo") ? 777 : CRASH);
+  ASSERT(777, strpbrk("hihi", "ei") ? 777 : CRASH);
 }
--- a/third_party/chibicc/test/initializer_test.c
+++ b/third_party/chibicc/test/initializer_test.c
@ -1,802 +0,0 @@
-#include "third_party/chibicc/test/test.h"
-
-char g3 = 3;
-short g4 = 4;
-int g5 = 5;
-long g6 = 6;
-int g9[3] = {0, 1, 2};
-struct {
-  char a;
-  int b;
-} g11[2] = {{1, 2}, {3, 4}};
-struct {
-  int a[2];
-} g12[2] = {{{1, 2}}};
-union {
-  int a;
-  char b[8];
-} g13[2] = {0x01020304, 0x05060708};
-char g17[] = "foobar";
-char g18[10] = "foobar";
-char g19[3] = "foobar";
-char *g20 = g17 + 0;
-char *g21 = g17 + 3;
-char *g22 = &g17 - 3;
-char *g23[] = {g17 + 0, g17 + 3, g17 - 3};
-int g24 = 3;
-int *g25 = &g24;
-int g26[3] = {1, 2, 3};
-int *g27 = g26 + 1;
-int *g28 = &g11[1].a;
-long g29 = (long)(long)g26;
-struct {
-  struct {
-    int a[3];
-  } a;
-} g30 = {{{1, 2, 3}}};
-int *g31 = g30.a.a;
-struct {
-  int a[2];
-} g40[2] = {{1, 2}, 3, 4};
-struct {
-  int a[2];
-} g41[2] = {1, 2, 3, 4};
-char g43[][4] = {'f', 'o', 'o', 0, 'b', 'a', 'r', 0};
-char *g44 = {"foo"};
-union {
-  int a;
-  char b[4];
-} g50 = {.b[2] = 0x12};
-union {
-  int a;
-} g51[2] = {};
-
-typedef char T60[];
-T60 g60 = {1, 2, 3};
-T60 g61 = {1, 2, 3, 4, 5, 6};
-
-typedef struct {
-  char a, b[];
-} T65;
-T65 g65 = {'f', 'o', 'o', 0};
-T65 g66 = {'f', 'o', 'o', 'b', 'a', 'r', 0};
-
-int main() {
-  ASSERT(1, ({
-           int x[3] = {1, 2, 3};
-           x[0];
-         }));
-  ASSERT(2, ({
-           int x[3] = {1, 2, 3};
-           x[1];
-         }));
-  ASSERT(3, ({
-           int x[3] = {1, 2, 3};
-           x[2];
-         }));
-  ASSERT(3, ({
-           int x[3] = {1, 2, 3};
-           x[2];
-         }));
-
-  ASSERT(2, ({
-           int x[2][3] = {{1, 2, 3}, {4, 5, 6}};
-           x[0][1];
-         }));
-  ASSERT(4, ({
-           int x[2][3] = {{1, 2, 3}, {4, 5, 6}};
-           x[1][0];
-         }));
-  ASSERT(6, ({
-           int x[2][3] = {{1, 2, 3}, {4, 5, 6}};
-           x[1][2];
-         }));
-
-  ASSERT(0, ({
-           int x[3] = {};
-           x[0];
-         }));
-  ASSERT(0, ({
-           int x[3] = {};
-           x[1];
-         }));
-  ASSERT(0, ({
-           int x[3] = {};
-           x[2];
-         }));
-
-  ASSERT(2, ({
-           int x[2][3] = {{1, 2}};
-           x[0][1];
-         }));
-  ASSERT(0, ({
-           int x[2][3] = {{1, 2}};
-           x[1][0];
-         }));
-  ASSERT(0, ({
-           int x[2][3] = {{1, 2}};
-           x[1][2];
-         }));
-
-  ASSERT('a', ({
-           char x[4] = "abc";
-           x[0];
-         }));
-  ASSERT('c', ({
-           char x[4] = "abc";
-           x[2];
-         }));
-  ASSERT(0, ({
-           char x[4] = "abc";
-           x[3];
-         }));
-  ASSERT('a', ({
-           char x[2][4] = {"abc", "def"};
-           x[0][0];
-         }));
-  ASSERT(0, ({
-           char x[2][4] = {"abc", "def"};
-           x[0][3];
-         }));
-  ASSERT('d', ({
-           char x[2][4] = {"abc", "def"};
-           x[1][0];
-         }));
-  ASSERT('f', ({
-           char x[2][4] = {"abc", "def"};
-           x[1][2];
-         }));
-
-  ASSERT(4, ({
-           int x[] = {1, 2, 3, 4};
-           x[3];
-         }));
-  ASSERT(16, ({
-           int x[] = {1, 2, 3, 4};
-           sizeof(x);
-         }));
-  ASSERT(4, ({
-           char x[] = "foo";
-           sizeof(x);
-         }));
-
-  ASSERT(4, ({
-           typedef char T[];
-           T x = "foo";
-           T y = "x";
-           sizeof(x);
-         }));
-  ASSERT(2, ({
-           typedef char T[];
-           T x = "foo";
-           T y = "x";
-           sizeof(y);
-         }));
-  ASSERT(2, ({
-           typedef char T[];
-           T x = "x";
-           T y = "foo";
-           sizeof(x);
-         }));
-  ASSERT(4, ({
-           typedef char T[];
-           T x = "x";
-           T y = "foo";
-           sizeof(y);
-         }));
-
-  ASSERT(1, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1, 2, 3};
-           x.a;
-         }));
-  ASSERT(2, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1, 2, 3};
-           x.b;
-         }));
-  ASSERT(3, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1, 2, 3};
-           x.c;
-         }));
-  ASSERT(1, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1};
-           x.a;
-         }));
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1};
-           x.b;
-         }));
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-             int c;
-           } x = {1};
-           x.c;
-         }));
-
-  ASSERT(1, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {{1, 2}, {3, 4}};
-           x[0].a;
-         }));
-  ASSERT(2, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {{1, 2}, {3, 4}};
-           x[0].b;
-         }));
-  ASSERT(3, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {{1, 2}, {3, 4}};
-           x[1].a;
-         }));
-  ASSERT(4, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {{1, 2}, {3, 4}};
-           x[1].b;
-         }));
-
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {{1, 2}};
-           x[1].b;
-         }));
-
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-           } x = {};
-           x.a;
-         }));
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-           } x = {};
-           x.b;
-         }));
-
-  ASSERT(5, ({
-           typedef struct {
-             int a, b, c, d, e, f;
-           } T;
-           T x = {1, 2, 3, 4, 5, 6};
-           T y;
-           y = x;
-           y.e;
-         }));
-  ASSERT(2, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y, z;
-           z = y = x;
-           z.b;
-         }));
-
-  ASSERT(1, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y = x;
-           y.a;
-         }));
-
-  ASSERT(4, ({
-           union {
-             int a;
-             char b[4];
-           } x = {0x01020304};
-           x.b[0];
-         }));
-  ASSERT(3, ({
-           union {
-             int a;
-             char b[4];
-           } x = {0x01020304};
-           x.b[1];
-         }));
-
-  ASSERT(0x01020304, ({
-           union {
-             struct {
-               char a, b, c, d;
-             } e;
-             int f;
-           } x = {{4, 3, 2, 1}};
-           x.f;
-         }));
-
-  ASSERT(3, g3);
-  ASSERT(4, g4);
-  ASSERT(5, g5);
-  ASSERT(6, g6);
-
-  ASSERT(0, g9[0]);
-  ASSERT(1, g9[1]);
-  ASSERT(2, g9[2]);
-
-  ASSERT(1, g11[0].a);
-  ASSERT(2, g11[0].b);
-  ASSERT(3, g11[1].a);
-  ASSERT(4, g11[1].b);
-
-  ASSERT(1, g12[0].a[0]);
-  ASSERT(2, g12[0].a[1]);
-  ASSERT(0, g12[1].a[0]);
-  ASSERT(0, g12[1].a[1]);
-
-  ASSERT(4, g13[0].b[0]);
-  ASSERT(3, g13[0].b[1]);
-  ASSERT(8, g13[1].b[0]);
-  ASSERT(7, g13[1].b[1]);
-
-  ASSERT(7, sizeof(g17));
-  ASSERT(10, sizeof(g18));
-  ASSERT(3, sizeof(g19));
-
-  ASSERT(0, memcmp(g17, "foobar", 7));
-  ASSERT(0, memcmp(g18, "foobar\0\0\0", 10));
-  ASSERT(0, memcmp(g19, "foo", 3));
-
-  ASSERT(0, strcmp(g20, "foobar"));
-  ASSERT(0, strcmp(g21, "bar"));
-  ASSERT(0, strcmp(g22 + 3, "foobar"));
-
-  ASSERT(0, strcmp(g23[0], "foobar"));
-  ASSERT(0, strcmp(g23[1], "bar"));
-  ASSERT(0, strcmp(g23[2] + 3, "foobar"));
-
-  ASSERT(3, g24);
-  ASSERT(3, *g25);
-  ASSERT(2, *g27);
-  ASSERT(3, *g28);
-  ASSERT(1, *(int *)g29);
-
-  ASSERT(1, g31[0]);
-  ASSERT(2, g31[1]);
-  ASSERT(3, g31[2]);
-
-  ASSERT(1, g40[0].a[0]);
-  ASSERT(2, g40[0].a[1]);
-  ASSERT(3, g40[1].a[0]);
-  ASSERT(4, g40[1].a[1]);
-
-  ASSERT(1, g41[0].a[0]);
-  ASSERT(2, g41[0].a[1]);
-  ASSERT(3, g41[1].a[0]);
-  ASSERT(4, g41[1].a[1]);
-
-  ASSERT(0, ({
-           int x[2][3] = {0, 1, 2, 3, 4, 5};
-           x[0][0];
-         }));
-  ASSERT(3, ({
-           int x[2][3] = {0, 1, 2, 3, 4, 5};
-           x[1][0];
-         }));
-
-  ASSERT(0, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {0, 1, 2, 3};
-           x[0].a;
-         }));
-  ASSERT(2, ({
-           struct {
-             int a;
-             int b;
-           } x[2] = {0, 1, 2, 3};
-           x[1].a;
-         }));
-
-  ASSERT(0, strcmp(g43[0], "foo"));
-  ASSERT(0, strcmp(g43[1], "bar"));
-  ASSERT(0, strcmp(g44, "foo"));
-
-  ASSERT(3, ({
-           int a[] = {
-               1,
-               2,
-               3,
-           };
-           a[2];
-         }));
-  ASSERT(1, ({
-           struct {
-             int a, b, c;
-           } x = {
-               1,
-               2,
-               3,
-           };
-           x.a;
-         }));
-  ASSERT(1, ({
-           union {
-             int a;
-             char b;
-           } x = {
-               1,
-           };
-           x.a;
-         }));
-  ASSERT(2, ({
-           enum {
-             x,
-             y,
-             z,
-           };
-           z;
-         }));
-
-  ASSERT(3, sizeof(g60));
-  ASSERT(6, sizeof(g61));
-
-  ASSERT(4, sizeof(g65));
-  ASSERT(7, sizeof(g66));
-  ASSERT(0, strcmp(g65.b, "oo"));
-  ASSERT(0, strcmp(g66.b, "oobar"));
-
-  ASSERT(4, ({
-           int x[3] = {1, 2, 3, [0] = 4, 5};
-           x[0];
-         }));
-  ASSERT(5, ({
-           int x[3] = {1, 2, 3, [0] = 4, 5};
-           x[1];
-         }));
-  ASSERT(3, ({
-           int x[3] = {1, 2, 3, [0] = 4, 5};
-           x[2];
-         }));
-
-  ASSERT(10, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[0][0];
-         }));
-  ASSERT(11, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[0][1];
-         }));
-  ASSERT(8, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[0][2];
-         }));
-  ASSERT(12, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[1][0];
-         }));
-  ASSERT(5, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[1][1];
-         }));
-  ASSERT(6, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0][1] = 7, 8, [0] = 9, [0] = 10, 11, [1][0] = 12};
-           x[1][2];
-         }));
-
-  ASSERT(7, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[0][0];
-         }));
-  ASSERT(8, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[0][1];
-         }));
-  ASSERT(3, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[0][2];
-         }));
-  ASSERT(9, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[1][0];
-         }));
-  ASSERT(10, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[1][1];
-         }));
-  ASSERT(6, ({
-           int x[2][3] = {1, 2, 3, 4, 5, 6, [0] = {7, 8}, 9, 10};
-           x[1][2];
-         }));
-
-  ASSERT(7, ((int[10]){[3] = 7})[3]);
-  ASSERT(0, ((int[10]){[3] = 7})[4]);
-
-  ASSERT(10, ({
-           char x[] = {[10 - 3] = 1, 2, 3};
-           sizeof(x);
-         }));
-  ASSERT(20, ({
-           char x[][2] = {[8][1] = 1, 2};
-           sizeof(x);
-         }));
-
-  ASSERT(3, sizeof(g60));
-  ASSERT(6, sizeof(g61));
-
-  ASSERT(4, sizeof(g65));
-  ASSERT(7, sizeof(g66));
-  ASSERT(0, strcmp(g65.b, "oo"));
-  ASSERT(0, strcmp(g66.b, "oobar"));
-
-  ASSERT(7, ((int[10]){[3] 7})[3]);
-  ASSERT(0, ((int[10]){[3] 7})[4]);
-
-  ASSERT(4, ({
-           struct {
-             int a, b;
-           } x = {1, 2, .b = 3, .a = 4};
-           x.a;
-         }));
-  ASSERT(3, ({
-           struct {
-             int a, b;
-           } x = {1, 2, .b = 3, .a = 4};
-           x.b;
-         }));
-
-  ASSERT(1, ({
-           struct {
-             struct {
-               int a, b;
-             } c;
-           } x = {.c = 1, 2};
-           x.c.a;
-         }));
-  ASSERT(2, ({
-           struct {
-             struct {
-               int a, b;
-             } c;
-           } x = {.c = 1, 2};
-           x.c.b;
-         }));
-
-  ASSERT(0, ({
-           struct {
-             struct {
-               int a, b;
-             } c;
-           } x = {.c.b = 1};
-           x.c.a;
-         }));
-  ASSERT(1, ({
-           struct {
-             struct {
-               int a, b;
-             } c;
-           } x = {.c.b = 1};
-           x.c.b;
-         }));
-
-  ASSERT(1, ({
-           struct {
-             int a[2];
-           } x = {.a = 1, 2};
-           x.a[0];
-         }));
-  ASSERT(2, ({
-           struct {
-             int a[2];
-           } x = {.a = 1, 2};
-           x.a[1];
-         }));
-
-  ASSERT(0, ({
-           struct {
-             int a[2];
-           } x = {.a[1] = 1};
-           x.a[0];
-         }));
-  ASSERT(1, ({
-           struct {
-             int a[2];
-           } x = {.a[1] = 1};
-           x.a[1];
-         }));
-
-  ASSERT(3, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[0].a;
-         }));
-  ASSERT(4, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[0].b;
-         }));
-  ASSERT(0, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[1].a;
-         }));
-  ASSERT(1, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[1].b;
-         }));
-  ASSERT(2, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[2].a;
-         }));
-  ASSERT(0, ({
-           struct {
-             int a, b;
-           } x[] = {
-               [1].b = 1,
-               2,
-               [0] = 3,
-               4,
-           };
-           x[2].b;
-         }));
-
-  ASSERT(1, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y[] = {x};
-           y[0].a;
-         }));
-  ASSERT(2, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y[] = {x};
-           y[0].b;
-         }));
-  ASSERT(0, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y[] = {x, [0].b = 3};
-           y[0].a;
-         }));
-  ASSERT(3, ({
-           typedef struct {
-             int a, b;
-           } T;
-           T x = {1, 2};
-           T y[] = {x, [0].b = 3};
-           y[0].b;
-         }));
-
-  ASSERT(5, ((struct { int a, b, c; }){.c = 5}).c);
-  ASSERT(0, ((struct { int a, b, c; }){.c = 5}).a);
-
-  ASSERT(0x00ff, ({
-           union {
-             unsigned short a;
-             char b[2];
-           } x = {.b[0] = 0xff};
-           x.a;
-         }));
-  ASSERT(0xff00, ({
-           union {
-             unsigned short a;
-             char b[2];
-           } x = {.b[1] = 0xff};
-           x.a;
-         }));
-
-  ASSERT(0x00120000, g50.a);
-  ASSERT(0, g51[0].a);
-  ASSERT(0, g51[1].a);
-
-  ASSERT(1, ({
-           struct {
-             struct {
-               int a;
-               struct {
-                 int b;
-               };
-             };
-             int c;
-           } x = {1, 2, 3, .b = 4, 5};
-           x.a;
-         }));
-  ASSERT(4, ({
-           struct {
-             struct {
-               int a;
-               struct {
-                 int b;
-               };
-             };
-             int c;
-           } x = {1, 2, 3, .b = 4, 5};
-           x.b;
-         }));
-  ASSERT(5, ({
-           struct {
-             struct {
-               int a;
-               struct {
-                 int b;
-               };
-             };
-             int c;
-           } x = {1, 2, 3, .b = 4, 5};
-           x.c;
-         }));
-
-  ASSERT(16, ({
-           char x[] = {[2 ... 10] = 'a', [7] = 'b', [15 ... 15] = 'c', [3 ... 5] = 'd'};
-           sizeof(x);
-         }));
-  ASSERT(0, ({
-           char x[] = {[2 ... 10] = 'a', [7] = 'b', [15 ... 15] = 'c', [3 ... 5] = 'd'};
-           memcmp(x, "\0\0adddabaaa\0\0\0\0c", 16);
-         }));
-
-  return 0;
-}
--- a/third_party/chibicc/test/sizeof_test.c
+++ b/third_party/chibicc/test/sizeof_test.c
@ -109,5 +109,9 @@ int main() {

  ASSERT(1, sizeof(main));

+  ASSERT(1, sizeof(""));
+  ASSERT(2, sizeof("h"));
+  ASSERT(6, sizeof("hello"));
+
  return 0;
 }
--- a/third_party/chibicc/tokenize.c
+++ b/third_party/chibicc/tokenize.c
@ -1,6 +1,6 @@
 #include "third_party/chibicc/chibicc.h"

-#define LOOKINGAT(TOK, OP) (!memcmp(TOK, OP, strlen(OP)))
+#define LOOKINGAT(TOK, OP) (!memcmp(TOK, OP, sizeof(OP) - 1))

 // Input file
 static File *current_file;
--- a/third_party/chibicc/type.c
+++ b/third_party/chibicc/type.c
@ -113,7 +113,9 @@ Type *vla_of(Type *base, Node *len) {
 }

 Type *enum_type(void) {
-  return new_type(TY_ENUM, 4, 4);
+  Type *ty = new_type(TY_ENUM, 4, 4);
+  ty->is_unsigned = true;
+  return ty;
 }

 Type *struct_type(void) {
--- a/third_party/compiler_rt/clear_cache.c
+++ b/third_party/compiler_rt/clear_cache.c
@ -1,183 +0,0 @@
-/* clang-format off */
-/* ===-- clear_cache.c - Implement __clear_cache ---------------------------===
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- */
-
-STATIC_YOINK("huge_compiler_rt_license");
-
-#include "third_party/compiler_rt/int_lib.h"
-
-#if __APPLE__
-  #include <libkern/OSCacheControl.h>
-#endif
-
-#if defined(_WIN32)
-/* Forward declare Win32 APIs since the GCC mode driver does not handle the
-   newer SDKs as well as needed.  */
-uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress,
-                               uintptr_t dwSize);
-uintptr_t GetCurrentProcess(void);
-#endif
-
-#if defined(__linux__) && defined(__mips__)
-  #if defined(__ANDROID__) && defined(__LP64__)
-    /*
-     * clear_mips_cache - Invalidates instruction cache for Mips.
-     */
-    static void clear_mips_cache(const void* Addr, size_t Size) {
-      __asm__ volatile (
-        ".set push\n"
-        ".set noreorder\n"
-        ".set noat\n"
-        "beq %[Size], $zero, 20f\n"          /* If size == 0, branch around. */
-        "nop\n"
-        "daddu %[Size], %[Addr], %[Size]\n"  /* Calculate end address + 1 */
-        "rdhwr $v0, $1\n"                    /* Get step size for SYNCI.
-                                                $1 is $HW_SYNCI_Step */
-        "beq $v0, $zero, 20f\n"              /* If no caches require
-                                                synchronization, branch
-                                                around. */
-        "nop\n"
-        "10:\n"
-        "synci 0(%[Addr])\n"                 /* Synchronize all caches around
-                                                address. */
-        "daddu %[Addr], %[Addr], $v0\n"      /* Add step size. */
-        "sltu $at, %[Addr], %[Size]\n"       /* Compare current with end
-                                                address. */
-        "bne $at, $zero, 10b\n"              /* Branch if more to do. */
-        "nop\n"
-        "sync\n"                             /* Clear memory hazards. */
-        "20:\n"
-        "bal 30f\n"
-        "nop\n"
-        "30:\n"
-        "daddiu $ra, $ra, 12\n"              /* $ra has a value of $pc here.
-                                                Add offset of 12 to point to the
-                                                instruction after the last nop.
-                                              */
-        "jr.hb $ra\n"                        /* Return, clearing instruction
-                                                hazards. */
-        "nop\n"
-        ".set pop\n"
-        : [Addr] "+r"(Addr), [Size] "+r"(Size)
-        :: "at", "ra", "v0", "memory"
-      );
-    }
-  #endif
-#endif
-
-/*
- * The compiler generates calls to __clear_cache() when creating 
- * trampoline functions on the stack for use with nested functions.
- * It is expected to invalidate the instruction cache for the 
- * specified range.
- */
-
-void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
-/*
- * Intel processors have a unified instruction and data cache
- * so there is nothing to do
- */
-#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
-    FlushInstructionCache(GetCurrentProcess(), start, end - start);
-#elif defined(__arm__) && !defined(__APPLE__)
-    #if defined(__FreeBSD__) || defined(__NetBSD__)
-        struct arm_sync_icache_args arg;
-
-        arg.addr = (uintptr_t)start;
-        arg.len = (uintptr_t)end - (uintptr_t)start;
-
-        sysarch(ARM_SYNC_ICACHE, &arg);
-    #elif defined(__linux__)
-    /*
-     * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
-     * it also brought many other unused defines, as well as a dependency on
-     * kernel headers to be installed.
-     *
-     * This value is stable at least since Linux 3.13 and should remain so for
-     * compatibility reasons, warranting it's re-definition here.
-     */
-    #define __ARM_NR_cacheflush 0x0f0002
-         register int start_reg __asm("r0") = (int) (intptr_t) start;
-         const register int end_reg __asm("r1") = (int) (intptr_t) end;
-         const register int flags __asm("r2") = 0;
-         const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
-         __asm __volatile("svc 0x0"
-                          : "=r"(start_reg)
-                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
-                            "r"(flags));
-         assert(start_reg == 0 && "Cache flush syscall failed.");
-    #else
-        compilerrt_abort();
-    #endif
-#elif defined(__linux__) && defined(__mips__)
-  const uintptr_t start_int = (uintptr_t) start;
-  const uintptr_t end_int = (uintptr_t) end;
-    #if defined(__ANDROID__) && defined(__LP64__)
-        // Call synci implementation for short address range.
-        const uintptr_t address_range_limit = 256;
-        if ((end_int - start_int) <= address_range_limit) {
-            clear_mips_cache(start, (end_int - start_int));
-        } else {
-            syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
-        }
-    #else
-        syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
-    #endif
-#elif defined(__mips__) && defined(__OpenBSD__)
-  cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
-#elif defined(__aarch64__) && !defined(__APPLE__)
-  uint64_t xstart = (uint64_t)(uintptr_t) start;
-  uint64_t xend = (uint64_t)(uintptr_t) end;
-  uint64_t addr;
-
-  // Get Cache Type Info
-  uint64_t ctr_el0;
-  __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
-
-  /*
-   * dc & ic instructions must use 64bit registers so we don't use
-   * uintptr_t in case this runs in an IPL32 environment.
-   */
-  const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
-  for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
-       addr += dcache_line_size)
-    __asm __volatile("dc cvau, %0" :: "r"(addr));
-  __asm __volatile("dsb ish");
-
-  const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
-  for (addr = xstart & ~(icache_line_size - 1); addr < xend;
-       addr += icache_line_size)
-    __asm __volatile("ic ivau, %0" :: "r"(addr));
-  __asm __volatile("isb sy");
-#elif defined (__powerpc64__)
-  const size_t line_size = 32;
-  const size_t len = (uintptr_t)end - (uintptr_t)start;
-
-  const uintptr_t mask = ~(line_size - 1);
-  const uintptr_t start_line = ((uintptr_t)start) & mask;
-  const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask;
-
-  for (uintptr_t line = start_line; line < end_line; line += line_size)
-    __asm__ volatile("dcbf 0, %0" : : "r"(line));
-  __asm__ volatile("sync");
-
-  for (uintptr_t line = start_line; line < end_line; line += line_size)
-    __asm__ volatile("icbi 0, %0" : : "r"(line));
-  __asm__ volatile("isync");
-#else
-    #if __APPLE__
-        /* On Darwin, sys_icache_invalidate() provides this functionality */
-        sys_icache_invalidate(start, end-start);
-    #else
-        compilerrt_abort();
-    #endif
-#endif
-}
--- a/third_party/compiler_rt/trampoline_setup.c
+++ b/third_party/compiler_rt/trampoline_setup.c
@ -1,51 +0,0 @@
-/* clang-format off */
-/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------===
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- */
-
-STATIC_YOINK("huge_compiler_rt_license");
-
-#include "third_party/compiler_rt/int_lib.h"
-
-extern void __clear_cache(void* start, void* end);
-
-/*
- * The ppc compiler generates calls to __trampoline_setup() when creating 
- * trampoline functions on the stack for use with nested functions.
- * This function creates a custom 40-byte trampoline function on the stack 
- * which loads r11 with a pointer to the outer function's locals
- * and then jumps to the target nested function.
- */
-
-#if __ppc__ && !defined(__powerpc64__)
-COMPILER_RT_ABI void
-__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
-                   const void* realFunc, void* localsPtr)
-{
-    /* should never happen, but if compiler did not allocate */
-    /* enough space on stack for the trampoline, abort */
-    if ( trampSizeAllocated < 40 )
-        compilerrt_abort();
-    
-    /* create trampoline */
-    trampOnStack[0] = 0x7c0802a6;    /* mflr r0 */
-    trampOnStack[1] = 0x4800000d;    /* bl Lbase */
-    trampOnStack[2] = (uint32_t)realFunc;
-    trampOnStack[3] = (uint32_t)localsPtr;
-    trampOnStack[4] = 0x7d6802a6;    /* Lbase: mflr r11 */
-    trampOnStack[5] = 0x818b0000;    /* lwz    r12,0(r11) */
-    trampOnStack[6] = 0x7c0803a6;    /* mtlr r0 */
-    trampOnStack[7] = 0x7d8903a6;    /* mtctr r12 */
-    trampOnStack[8] = 0x816b0004;    /* lwz    r11,4(r11) */
-    trampOnStack[9] = 0x4e800420;    /* bctr */
-    
-    /* clear instruction cache */
-    __clear_cache(trampOnStack, &trampOnStack[10]);
-}
-#endif /* __ppc__ && !defined(__powerpc64__) */
--- a/third_party/duktape/duk_config.h
+++ b/third_party/duktape/duk_config.h
@ -800,8 +800,8 @@
 * because of bugs in gcc-4.4
 * (http://lists.debian.org/debian-gcc/2010/04/msg00000.html)
 */
-#define DUK_LIKELY(x)   likely(x)
-#define DUK_UNLIKELY(x) unlikely(x)
+#define DUK_LIKELY(x)   __builtin_expect(!!(x), 1)
+#define DUK_UNLIKELY(x) __builtin_expect(!!(x), 0)
 #endif
 /* XXX: equivalent of clang __builtin_unpredictable? */

--- a/tool/build/build.mk
+++ b/tool/build/build.mk
@ -48,7 +48,6 @@ TOOL_BUILD_DIRECTDEPS =					\
 	LIBC_STUBS					\
 	LIBC_SYSV					\
 	LIBC_SYSV_CALLS					\
-	LIBC_TESTLIB					\
 	LIBC_TIME					\
 	LIBC_TINYMATH					\
 	LIBC_UNICODE					\
--- a/tool/build/lib/elfwriter.c
+++ b/tool/build/lib/elfwriter.c
@ -190,7 +190,10 @@ void elfwriter_close(struct ElfWriter *elf) {
  freeinterner(elf->strtab);
  free(elf->shdrs->p);
  free(elf->relas->p);
-  for (i = 0; i < ARRAYLEN(elf->syms); ++i) free(elf->syms[i]->p);
+  free(elf->path);
+  for (i = 0; i < ARRAYLEN(elf->syms); ++i) {
+    free(elf->syms[i]->p);
+  }
  free(elf);
 }

@ -237,6 +240,14 @@ void elfwriter_finishsection(struct ElfWriter *elf) {
  if (elf->relas->j < elf->relas->i) MakeRelaSection(elf, section);
 }

+/**
+ * Appends symbol.
+ *
+ * This function should be called between elfwriter_startsection() and
+ * elfwriter_finishsection(). If that's not possible, then this can be
+ * called after elfwriter_open() and then elfwriter_setsection() can be
+ * called later to fix-up the section id.
+ */
 struct ElfWriterSymRef elfwriter_appendsym(struct ElfWriter *elf,
                                           const char *name, int st_info,
                                           int st_other, size_t st_value,
@ -247,6 +258,11 @@ struct ElfWriterSymRef elfwriter_appendsym(struct ElfWriter *elf,
                                          : kElfWriterSymGlobal);
 }

+void elfwriter_setsection(struct ElfWriter *elf, struct ElfWriterSymRef sym,
+                          uint16_t st_shndx) {
+  elf->syms[sym.slg]->p[sym.sym].st_shndx = st_shndx;
+}
+
 struct ElfWriterSymRef elfwriter_linksym(struct ElfWriter *elf,
                                         const char *name, int st_info,
                                         int st_other) {
--- a/tool/build/lib/elfwriter.h
+++ b/tool/build/lib/elfwriter.h
@ -61,6 +61,7 @@ struct ElfWriterSymRef elfwriter_linksym(struct ElfWriter *, const char *, int,
 struct ElfWriterSymRef elfwriter_appendsym(struct ElfWriter *, const char *,
                                           int, int, size_t, size_t);
 void elfwriter_yoink(struct ElfWriter *, const char *);
+void elfwriter_setsection(struct ElfWriter *, struct ElfWriterSymRef, uint16_t);

 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
--- a/tool/build/lib/interner.c
+++ b/tool/build/lib/interner.c
@ -47,7 +47,7 @@ static void rehash(struct InternerObject *it) {
    if (!p[i].hash) continue;
    step = 0;
    do {
-      j = (p[i].hash + step * (step + 1) / 2) & (it->n - 1);
+      j = (p[i].hash + step * ((step + 1) >> 1)) & (it->n - 1);
      step++;
    } while (it->p[j].hash);
    memcpy(&it->p[j], &p[i], sizeof(p[i]));
@ -103,7 +103,7 @@ size_t internobj(struct Interner *t, const void *data, size_t size) {
  hash = max(1, KnuthMultiplicativeHash32(data, size));
  do {
    /* it is written that triangle probe halts iff i<n/2 && popcnt(n)==1 */
-    i = (hash + step * (step + 1) / 2) & (it->n - 1);
+    i = (hash + step * ((step + 1) >> 1)) & (it->n - 1);
    if (it->p[i].hash == hash && it->p[i].index + size <= it->pool.n &&
        memcmp(item, &it->pool.p[it->p[i].index], size) == 0) {
      return it->p[i].index;
@ -114,7 +114,7 @@ size_t internobj(struct Interner *t, const void *data, size_t size) {
    rehash(it);
    step = 0;
    do {
-      i = (hash + step * (step + 1) / 2) & (it->n - 1);
+      i = (hash + step * ((step + 1) >> 1)) & (it->n - 1);
      step++;
    } while (it->p[i].hash);
  }
--- a/tool/build/lib/iovs.c
+++ b/tool/build/lib/iovs.c
@ -33,7 +33,7 @@ int AppendIovs(struct Iovs *ib, void *base, size_t len) {
    if (i && (intptr_t)base == (intptr_t)p[i - 1].iov_base + p[i - 1].iov_len) {
      p[i - 1].iov_len += len;
    } else {
-      if (unlikely(i == n)) {
+      if (__builtin_expect(i == n, 0)) {
        n += n >> 1;
        if (p == ib->init) {
          if (!(p = malloc(sizeof(struct iovec) * n))) return -1;
--- a/tool/build/mkdeps.c
+++ b/tool/build/mkdeps.c
@ -66,7 +66,7 @@
 * Only the first 64kb of each source file is considered.
 */

-alignas(16) const char kIncludePrefix[] = "include \"";
+_Alignas(16) const char kIncludePrefix[] = "include \"";

 const char kSourceExts[][5] = {".s", ".S", ".c", ".cc", ".cpp"};

--- a/tool/decode/elf.c
+++ b/tool/decode/elf.c
@ -32,6 +32,7 @@
 #include "libc/sysv/consts/map.h"
 #include "libc/sysv/consts/o.h"
 #include "libc/sysv/consts/prot.h"
+#include "libc/x/x.h"
 #include "tool/decode/lib/asmcodegen.h"
 #include "tool/decode/lib/elfidnames.h"
 #include "tool/decode/lib/flagger.h"
@ -249,10 +250,10 @@ static char *getelfsymbolname(const Elf64_Ehdr *elf, size_t mapsize,

 static void printelfrelocations(void) {
  int sym;
-  size_t i, j;
+  size_t i, j, count;
  const Elf64_Sym *syms;
  const Elf64_Rela *rela;
-  const Elf64_Shdr *shdr, *boop;
+  const Elf64_Shdr *shdr, *symtab;
  char *strtab, *shstrtab, *symbolname;
  strtab = GetElfStringTable(elf, st->st_size);
  shstrtab = GetElfSectionNameStringTable(elf, st->st_size);
@ -266,11 +267,16 @@ static void printelfrelocations(void) {
                   min((uintptr_t)elf + st->st_size,
                       (uintptr_t)elf + shdr->sh_offset + shdr->sh_size));
           ++rela, ++j) {
-        boop = GetElfSectionHeaderAddress(elf, st->st_size, shdr->sh_link);
-        syms = GetElfSectionAddress(elf, st->st_size, boop);
+        symtab = GetElfSectionHeaderAddress(elf, st->st_size, shdr->sh_link);
+        count = symtab->sh_size / symtab->sh_entsize;
+        syms = GetElfSectionAddress(elf, st->st_size, symtab);
        sym = ELF64_R_SYM(rela->r_info);
-        symbolname =
-            getelfsymbolname(elf, st->st_size, strtab, shstrtab, &syms[sym]);
+        if (0 <= sym && sym < count) {
+          symbolname =
+              getelfsymbolname(elf, st->st_size, strtab, shstrtab, syms + sym);
+        } else {
+          symbolname = xasprintf("bad-sym-%d", sym);
+        }
        printf("/\t%s+%#lx → %s%c%#lx\n",
               GetElfString(
                   elf, st->st_size, shstrtab,
@ -297,7 +303,7 @@ static void printelfrelocations(void) {
 int main(int argc, char *argv[]) {
  showcrashreports();
  if (argc != 2) {
-    fprintf(stderr, "usage: %`s FILE: %s\n", argv[0]);
+    fprintf(stderr, "usage: %s FILE\n", argv[0]);
    return 1;
  }
  path = argv[1];
--- a/tool/emacs/c.lang
+++ b/tool/emacs/c.lang
@ -1665,7 +1665,6 @@ Keywords={
 "COSMOPOLITAN_C_END_",
 "MACHINE_CODE_ANALYSIS_BEGIN_",
 "MACHINE_CODE_ANALYSIS_END_",
-"typescompatible",
 "DebugBreak",
 "VEIL",
 "CONCEAL",
@ -1674,8 +1673,6 @@ Keywords={
 "STATIC_YOINK",
 "STATIC_YOINK_SOURCE",
 "STRINGIFY",
-"isconstant",
-"chooseexpr",
 "likely",
 "unlikely",
 "assume",
--- a/tool/emacs/cosmo-c-builtins.el
+++ b/tool/emacs/cosmo-c-builtins.el
@ -1280,8 +1280,7 @@
           "MACHINE_CODE_ANALYSIS_END_"))

        (cosmopolitan-builtin-functions
-         '("typescompatible"
-           "DebugBreak"
+         '("DebugBreak"
           "VEIL"
           "CONCEAL"
           "EXPROPRIATE"
@ -1289,8 +1288,6 @@
           "STATIC_YOINK"
           "STATIC_YOINK_SOURCE"
           "STRINGIFY"
-           "isconstant"
-           "chooseexpr"
           "likely"
           "unlikely"))

--- a/tool/emacs/cosmo-stuff.el
+++ b/tool/emacs/cosmo-stuff.el
@ -441,7 +441,7 @@
  (cond ((not (eq 0 (logand 8 arg)))
         (cosmo--assembly (setq arg (logand (lognot 8)))
                          "SILENT=0 COPTS='-Os'"))
-        (t (cosmo--assembly arg "SILENT=0 COPTS='-Os' TARGET_ARCH='-march=znver2 -mdispatch-scheduler' CPPFLAGS='-DSTACK_FRAME_UNLIMITED'"))))
+        (t (cosmo--assembly arg "SILENT=0 COPTS='-Os' TARGET_ARCH='-mdispatch-scheduler' CPPFLAGS='-DSTACK_FRAME_UNLIMITED'"))))

 (defun cosmo-assembly-native (arg)
  (interactive "P")