2020-06-16 13:38:43 +00:00
|
|
|
|
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
2020-06-15 14:18:57 +00:00
|
|
|
|
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
|
|
|
│ │
|
|
|
|
|
│ This program is free software; you can redistribute it and/or modify │
|
|
|
|
|
│ it under the terms of the GNU General Public License as published by │
|
|
|
|
|
│ the Free Software Foundation; version 2 of the License. │
|
|
|
|
|
│ │
|
|
|
|
|
│ This program is distributed in the hope that it will be useful, but │
|
|
|
|
|
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
|
|
|
|
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
|
|
|
|
│ General Public License for more details. │
|
|
|
|
|
│ │
|
|
|
|
|
│ You should have received a copy of the GNU General Public License │
|
|
|
|
|
│ along with this program; if not, write to the Free Software │
|
|
|
|
|
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
|
|
|
|
│ 02110-1301 USA │
|
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
|
|
|
#include "libc/nexgen32e/macros.h"
|
|
|
|
|
#include "libc/nexgen32e/x86feature.h"
|
|
|
|
|
#include "libc/macros.h"
|
|
|
|
|
|
|
|
|
|
/ Mutates string to uppercase roman characters.
|
|
|
|
|
/
|
|
|
|
|
/ @param RDI points to non-const NUL-terminated string
|
|
|
|
|
/ @return RAX will be original RDI
|
|
|
|
|
/ @note 10x faster than C
|
|
|
|
|
strtoupper:
|
|
|
|
|
mov $'A-'a,%edx # adding this uppers
|
|
|
|
|
mov $'a|'z<<8,%ecx # uint8_t range a..z
|
|
|
|
|
jmp strcaseconv
|
2020-08-25 11:23:25 +00:00
|
|
|
|
.endfn strtoupper,globl
|
2020-06-15 14:18:57 +00:00
|
|
|
|
|
|
|
|
|
/ Mutates string to lowercase roman characters.
|
|
|
|
|
/
|
|
|
|
|
/ @param RDI points to non-const NUL-terminated string
|
|
|
|
|
/ @return RAX will be original RDI
|
|
|
|
|
/ @note 10x faster than C
|
|
|
|
|
strtolower:
|
|
|
|
|
mov $'a-'A,%edx # adding this lowers
|
|
|
|
|
mov $'A|'Z<<8,%ecx # uint8_t range A..Z
|
|
|
|
|
/ 𝑠𝑙𝑖𝑑𝑒
|
2020-08-25 11:23:25 +00:00
|
|
|
|
.endfn strtolower,globl
|
2020-06-15 14:18:57 +00:00
|
|
|
|
|
|
|
|
|
/ Support code for strtolower() and strtoupper().
|
|
|
|
|
/
|
|
|
|
|
/ @param RDI points to non-const NUL-terminated string
|
|
|
|
|
/ @param CL defines start of character range to mutate
|
|
|
|
|
/ @param CH defines end of character range to mutate
|
|
|
|
|
/ @param DL is added to each DIL ∈ [CL,CH]
|
|
|
|
|
/ @return RAX will be original RDI
|
|
|
|
|
strcaseconv:
|
|
|
|
|
.leafprologue
|
|
|
|
|
.profilable
|
|
|
|
|
mov %rdi,%rsi
|
|
|
|
|
0: testb $15,%sil # is it aligned?
|
|
|
|
|
#if X86_NEED(SSE4_2)
|
|
|
|
|
jz .Lsse4
|
|
|
|
|
#else
|
|
|
|
|
jnz 1f
|
|
|
|
|
testb X86_HAVE(SSE4_2)+kCpuids(%rip)
|
|
|
|
|
jnz .Lsse4 # is it nehalem?
|
|
|
|
|
#endif
|
|
|
|
|
1: lodsb # AL = *RSI++
|
|
|
|
|
test %al,%al # is it NUL?
|
|
|
|
|
jz 3f
|
|
|
|
|
cmp %cl,%al # is it in range?
|
2020-08-25 11:23:25 +00:00
|
|
|
|
jb 0b
|
2020-06-15 14:18:57 +00:00
|
|
|
|
cmp %ch,%al
|
2020-08-25 11:23:25 +00:00
|
|
|
|
ja 0b
|
2020-06-15 14:18:57 +00:00
|
|
|
|
add %dl,-1(%rsi)
|
2020-08-25 11:23:25 +00:00
|
|
|
|
jmp 0b
|
2020-06-15 14:18:57 +00:00
|
|
|
|
.Lsse4: movd %ecx,%xmm1 # XMM1 = ['A,'Z,0,0,...]
|
|
|
|
|
movd %edx,%xmm2 # XMM2 = ['a-'A,'a-'A,...]
|
|
|
|
|
pbroadcastb %xmm2
|
|
|
|
|
xor %ecx,%ecx
|
|
|
|
|
2: movdqa (%rsi,%rcx),%xmm3
|
2020-08-25 11:23:25 +00:00
|
|
|
|
/ ┌─0:index of the LEAST significant, set, bit is used
|
|
|
|
|
/ │ regardless of corresponding input element validity
|
|
|
|
|
/ │ intres2 is returned in least significant bits of xmm0
|
|
|
|
|
/ ├─1:index of the MOST significant, set, bit is used
|
|
|
|
|
/ │ regardless of corresponding input element validity
|
|
|
|
|
/ │ each bit of intres2 is expanded to byte/word
|
|
|
|
|
/ │┌─0:negation of intres1 is for all 16 (8) bits
|
|
|
|
|
/ │├─1:negation of intres1 is masked by reg/mem validity
|
|
|
|
|
/ ││┌─intres1 is negated (1’s complement)
|
|
|
|
|
/ │││┌─mode{equalany,ranges,equaleach,equalordered}
|
|
|
|
|
/ ││││ ┌─issigned
|
|
|
|
|
/ ││││ │┌─is16bit
|
|
|
|
|
/ u│││├┐││
|
2020-06-15 14:18:57 +00:00
|
|
|
|
pcmpistrm $0b01000100,%xmm3,%xmm1 # →XMM0 8-bit byte mask
|
|
|
|
|
pand %xmm2,%xmm0 # won't mask after NUL
|
|
|
|
|
paddb %xmm0,%xmm3
|
|
|
|
|
movdqa %xmm3,(%rsi,%rcx)
|
|
|
|
|
lea 16(%rcx),%rcx
|
|
|
|
|
jnz 2b # PCMPISTRM found NUL
|
|
|
|
|
3: mov %rdi,%rax
|
|
|
|
|
.leafepilogue
|
|
|
|
|
.endfn strcaseconv
|
2020-06-16 02:01:28 +00:00
|
|
|
|
.source __FILE__
|