/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ │vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ This program is free software; you can redistribute it and/or modify │ │ it under the terms of the GNU General Public License as published by │ │ the Free Software Foundation; version 2 of the License. │ │ │ │ This program is distributed in the hope that it will be useful, but │ │ WITHOUT ANY WARRANTY; without even the implied warranty of │ │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ │ General Public License for more details. │ │ │ │ You should have received a copy of the GNU General Public License │ │ along with this program; if not, write to the Free Software │ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/macros.h" #include "libc/nexgen32e/x86feature.h" #include "libc/macros.h" / Mutates string to uppercase roman characters. / / @param RDI points to non-const NUL-terminated string / @return RAX will be original RDI / @note 10x faster than C strtoupper: mov $'A-'a,%edx # adding this uppers mov $'a|'z<<8,%ecx # uint8_t range a..z jmp strcaseconv .endfn strtoupper,globl / Mutates string to lowercase roman characters. / / @param RDI points to non-const NUL-terminated string / @return RAX will be original RDI / @note 10x faster than C strtolower: mov $'a-'A,%edx # adding this lowers mov $'A|'Z<<8,%ecx # uint8_t range A..Z / 𝑠𝑙𝑖𝑑𝑒 .endfn strtolower,globl / Support code for strtolower() and strtoupper(). / / @param RDI points to non-const NUL-terminated string / @param CL defines start of character range to mutate / @param CH defines end of character range to mutate / @param DL is added to each DIL ∈ [CL,CH] / @return RAX will be original RDI strcaseconv: .leafprologue .profilable mov %rdi,%rsi 0: testb $15,%sil # is it aligned? #if X86_NEED(SSE4_2) jz .Lsse4 #else jnz 1f testb X86_HAVE(SSE4_2)+kCpuids(%rip) jnz .Lsse4 # is it nehalem? #endif 1: lodsb # AL = *RSI++ test %al,%al # is it NUL? jz 3f cmp %cl,%al # is it in range? jb 0b cmp %ch,%al ja 0b add %dl,-1(%rsi) jmp 0b .Lsse4: movd %ecx,%xmm1 # XMM1 = ['A,'Z,0,0,...] movd %edx,%xmm2 # XMM2 = ['a-'A,'a-'A,...] pbroadcastb %xmm2 xor %ecx,%ecx 2: movdqa (%rsi,%rcx),%xmm3 / ┌─0:index of the LEAST significant, set, bit is used / │ regardless of corresponding input element validity / │ intres2 is returned in least significant bits of xmm0 / ├─1:index of the MOST significant, set, bit is used / │ regardless of corresponding input element validity / │ each bit of intres2 is expanded to byte/word / │┌─0:negation of intres1 is for all 16 (8) bits / │├─1:negation of intres1 is masked by reg/mem validity / ││┌─intres1 is negated (1’s complement) / │││┌─mode{equalany,ranges,equaleach,equalordered} / ││││ ┌─issigned / ││││ │┌─is16bit / u│││├┐││ pcmpistrm $0b01000100,%xmm3,%xmm1 # →XMM0 8-bit byte mask pand %xmm2,%xmm0 # won't mask after NUL paddb %xmm0,%xmm3 movdqa %xmm3,(%rsi,%rcx) lea 16(%rcx),%rcx jnz 2b # PCMPISTRM found NUL 3: mov %rdi,%rax .leafepilogue .endfn strcaseconv .source __FILE__