/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ │vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ This program is free software; you can redistribute it and/or modify │ │ it under the terms of the GNU General Public License as published by │ │ the Free Software Foundation; version 2 of the License. │ │ │ │ This program is distributed in the hope that it will be useful, but │ │ WITHOUT ANY WARRANTY; without even the implied warranty of │ │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ │ General Public License for more details. │ │ │ │ You should have received a copy of the GNU General Public License │ │ along with this program; if not, write to the Free Software │ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.h" / Performs 128-bit div+mod by 10 without using div or mod. / / If we didn't have this one-off function, our palandprintf() / implementation would cause nearly everything to need a soft / math library. It also somehow goes faster than 64-bit IDIV. / / @param rdi:rsi is the number / @param rdx points to where remainder goes / @return rax:rdx is result of division / @see “Division by Invariant Integers using Multiplication” / @see llog10() and div10int64() is a tiny bit faster div10: .leafprologue .profilable push %rbx mov %rdx,%r8 test %rsi,%rsi je 1f bsr %rsi,%r10 xor $63,%r10d mov $125,%r9d sub %r10d,%r9d cmp $64,%r9d jne 6f xor %eax,%eax xor %r11d,%r11d jmp 9f 1: test %r8,%r8 je 3f movabs $0xcccccccccccccccd,%rcx mov %rdi,%rax mul %rcx shr $3,%rdx add %edx,%edx lea (%rdx,%rdx,4),%eax mov %edi,%ecx sub %eax,%ecx mov %ecx,(%r8) 3: movabs $0xcccccccccccccccd,%rcx mov %rdi,%rax mul %rcx mov %rdx,%rax shr $3,%rax xor %edi,%edi jmp 14f 6: mov %r9d,%ecx neg %cl cmp $62,%r10d jb 8f mov %rdi,%rdx shl %cl,%rdx mov %rsi,%rax mov %r9d,%ecx shr %cl,%rax shrd %cl,%rsi,%rdi xor %r11d,%r11d mov %rdi,%rsi mov %rdx,%rdi jmp 9f 8: mov %rdi,%r11 shl %cl,%r11 mov %rsi,%rax shl %cl,%rax mov %r9d,%ecx shr %cl,%rdi or %rax,%rdi shr %cl,%rsi xor %eax,%eax 9: add $-125,%r10d xor %ecx,%ecx mov $9,%r9d 10: shld $1,%rsi,%rax shld $1,%rdi,%rsi shld $1,%r11,%rdi mov %r11,%rdx add %r11,%rdx mov %rcx,%r11 or %rdx,%r11 cmp %rsi,%r9 mov $0,%ebx sbb %rax,%rbx sar $63,%rbx mov %ebx,%ecx and $1,%ecx and $10,%ebx sub %rbx,%rsi sbb $0,%rax inc %r10d jne 10b test %r8,%r8 je 13f mov %esi,(%r8) 13: lea (%rcx,%r11,2),%rax shld $1,%rdx,%rdi 14: mov %rdi,%rdx pop %rbx .leafepilogue .endfn div10,globl,hidden .source __FILE__