/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ │vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ This program is free software; you can redistribute it and/or modify │ │ it under the terms of the GNU General Public License as published by │ │ the Free Software Foundation; version 2 of the License. │ │ │ │ This program is distributed in the hope that it will be useful, but │ │ WITHOUT ANY WARRANTY; without even the implied warranty of │ │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ │ General Public License for more details. │ │ │ │ You should have received a copy of the GNU General Public License │ │ along with this program; if not, write to the Free Software │ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.h" .source __FILE__ .p2align 4 minmax_vector: cmp $7,%rdx jle .L27 test $7,%dl je .L5 lea -32(,%rdx,4),%rax lea (%rdi,%rax),%rcx add %rsi,%rax vmovdqu (%rax),%ymm0 vmovdqu (%rcx),%ymm1 and $-8,%rdx vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,(%rcx) vmovdqu %ymm0,(%rax) .p2align 4,,10 .p2align 3 .L5: vmovdqu (%rsi),%ymm1 vmovdqu (%rdi),%ymm0 add $32,%rsi vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,(%rdi) vmovdqu %ymm0,-32(%rsi) add $32,%rdi sub $8,%rdx jne .L5 vzeroupper .L25: ret .p2align 4,,10 .p2align 3 .L27: test %rdx,%rdx jle .L25 mov (%rdi),%eax cmp (%rsi),%eax cmovg (%rsi),%ecx cmovg %eax,%eax mov %ecx,(%rdi) mov %eax,(%rsi) cmp $1,%rdx je .L25 mov 4(%rdi),%eax cmp 4(%rsi),%eax cmovg 4(%rsi),%ecx cmovg %eax,%eax mov %ecx,4(%rdi) mov %eax,4(%rsi) cmp $2,%rdx je .L25 mov 8(%rdi),%eax cmp 8(%rsi),%eax cmovg 8(%rsi),%ecx cmovg %eax,%eax mov %ecx,8(%rdi) mov %eax,8(%rsi) cmp $3,%rdx je .L25 mov 12(%rdi),%eax cmp 12(%rsi),%eax cmovg 12(%rsi),%ecx cmovg %eax,%eax mov %ecx,12(%rdi) mov %eax,12(%rsi) cmp $4,%rdx je .L25 mov 16(%rdi),%eax cmp 16(%rsi),%eax cmovg 16(%rsi),%ecx cmovg %eax,%eax mov %ecx,16(%rdi) mov %eax,16(%rsi) cmp $5,%rdx je .L25 mov 20(%rdi),%eax cmp 20(%rsi),%eax cmovg 20(%rsi),%ecx cmovg %eax,%eax mov %ecx,20(%rdi) mov %eax,20(%rsi) cmp $7,%rdx jne .L25 mov 24(%rdi),%eax cmp 24(%rsi),%eax cmovg 24(%rsi),%edx cmovg %eax,%eax mov %edx,24(%rdi) mov %eax,24(%rsi) ret .endfn minmax_vector,globl .p2align 4 int32_twostages_32: test %rsi,%rsi jle .L33 lea -128(%rsi),%rax dec %rsi and $-128,%rsi mov %rax,%rdx sub %rsi,%rdx jmp .L30 .p2align 4,,10 .p2align 3 .L34: add $-128,%rax .L30: vmovdqu 256(%rdi),%ymm1 vmovdqu (%rdi),%ymm0 vmovdqu 384(%rdi),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu 128(%rdi),%ymm1 add $512,%rdi vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm0,-128(%rdi) vmovdqu -224(%rdi),%ymm1 vmovdqu -480(%rdi),%ymm0 vmovdqu %ymm4,-512(%rdi) vmovdqu %ymm2,-384(%rdi) vmovdqu -96(%rdi),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu -352(%rdi),%ymm1 vmovdqu %ymm3,-256(%rdi) vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm0,-96(%rdi) vmovdqu -192(%rdi),%ymm1 vmovdqu -448(%rdi),%ymm0 vmovdqu %ymm4,-480(%rdi) vmovdqu %ymm2,-352(%rdi) vmovdqu -64(%rdi),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu -320(%rdi),%ymm1 vmovdqu %ymm3,-224(%rdi) vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm0,-64(%rdi) vmovdqu -160(%rdi),%ymm1 vmovdqu -416(%rdi),%ymm0 vmovdqu %ymm4,-448(%rdi) vmovdqu %ymm2,-320(%rdi) vmovdqu -32(%rdi),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu -288(%rdi),%ymm1 vmovdqu %ymm3,-192(%rdi) vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,-416(%rdi) vmovdqu %ymm2,-288(%rdi) vmovdqu %ymm3,-160(%rdi) vmovdqu %ymm0,-32(%rdi) cmp %rdx,%rax jne .L34 vzeroupper .L33: ret .endfn int32_twostages_32,globl .p2align 4 int32_threestages: push %rbp mov %rsp,%rbp push %r15 push %r14 lea 0(,%rdx,8),%r14 push %r13 push %r12 push %rbx and $-32,%rsp sub $32,%rsp mov %rsi,16(%rsp) cmp %r14,%rsi jl .L41 lea -1(%rdx),%rax and $-8,%rax lea (%rdx,%rdx),%r8 mov %rax,8(%rsp) lea (%r8,%rdx),%rcx lea 0(,%rdx,4),%rsi mov %r14,%r9 mov %rdi,%r13 lea (%rsi,%rdx),%r11 lea (%rcx,%rcx),%r10 sub %rdx,%r9 xor %r12d,%r12d mov %r14,%rbx lea 32(%rdi),%r15 .p2align 4,,10 .p2align 3 .L37: mov %r12,%rdi lea (%rdx,%rdi),%rax mov %rbx,24(%rsp) mov %rbx,%r12 cmp %rax,%rdi jge .L40 lea 0(%r13,%rdi,4),%rax add 8(%rsp),%rdi lea (%r15,%rdi,4),%rdi .p2align 4,,10 .p2align 3 .L38: vmovdqu (%rax,%rsi,4),%ymm0 vmovdqu (%rax),%ymm6 vmovdqu (%rax,%rdx,4),%ymm1 vpminsd %ymm0,%ymm6,%ymm7 vpmaxsd %ymm0,%ymm6,%ymm6 vmovdqu (%rax,%r11,4),%ymm0 vmovdqu (%rax,%r9,4),%ymm8 vpmaxsd %ymm0,%ymm1,%ymm3 vpminsd %ymm0,%ymm1,%ymm2 vmovdqu (%rax,%r10,4),%ymm1 vmovdqu (%rax,%r8,4),%ymm0 vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu (%rax,%rcx,4),%ymm1 vpminsd %ymm8,%ymm1,%ymm5 vpmaxsd %ymm8,%ymm1,%ymm1 vpminsd %ymm4,%ymm7,%ymm8 vpmaxsd %ymm4,%ymm7,%ymm4 vpminsd %ymm5,%ymm2,%ymm7 vpmaxsd %ymm5,%ymm2,%ymm2 vpminsd %ymm0,%ymm6,%ymm5 vpmaxsd %ymm0,%ymm6,%ymm0 vpminsd %ymm1,%ymm3,%ymm6 vpmaxsd %ymm1,%ymm3,%ymm1 vpminsd %ymm7,%ymm8,%ymm9 vpmaxsd %ymm7,%ymm8,%ymm3 vpminsd %ymm2,%ymm4,%ymm8 vpminsd %ymm6,%ymm5,%ymm7 vpmaxsd %ymm2,%ymm4,%ymm2 vpmaxsd %ymm6,%ymm5,%ymm5 vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm9,(%rax) vmovdqu %ymm3,(%rax,%rdx,4) vmovdqu %ymm8,(%rax,%r8,4) vmovdqu %ymm2,(%rax,%rcx,4) vmovdqu %ymm7,(%rax,%rsi,4) vmovdqu %ymm5,(%rax,%r11,4) vmovdqu %ymm4,(%rax,%r10,4) vmovdqu %ymm0,(%rax,%r9,4) add $32,%rax cmp %rax,%rdi jne .L38 .L40: add %r14,%rbx cmp %rbx,16(%rsp) jge .L37 vzeroupper .L35: mov 24(%rsp),%rax lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .L41: movq $0,24(%rsp) jmp .L35 .endfn int32_threestages,globl .p2align 4 merge16_finish: vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm3,%ymm2 vperm2i128 $49,%ymm0,%ymm3,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm2 vpunpckhqdq %ymm0,%ymm1,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm1,%ymm0 vpunpckhdq %ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm3 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm3,%ymm0,%ymm2 vpmaxsd %ymm3,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm0 vperm2i128 $32,%ymm0,%ymm1,%ymm2 vperm2i128 $49,%ymm0,%ymm1,%ymm0 test %esi,%esi je .L46 vpcmpeqd %ymm1,%ymm1,%ymm1 vpxor %ymm1,%ymm2,%ymm2 vpxor %ymm1,%ymm0,%ymm0 .L46: vmovdqu %ymm2,(%rdi) vmovdqu %ymm0,32(%rdi) ret .endfn merge16_finish,globl .p2align 4 djbsort$avx2_2power: push %r13 mov %rdi,%r11 lea 16(%rsp),%r13 and $-32,%rsp push -8(%r13) push %rbp mov %rsp,%rbp push %r15 push %r14 push %r13 push %r12 push %rbx sub $200,%rsp mov %rsi,-144(%rbp) mov %edx,-164(%rbp) cmp $8,%rsi je .L194 cmpq $16,-144(%rbp) je .L195 cmpq $32,-144(%rbp) je .L196 mov %rsi,%r15 sar $3,%r15 test %r15,%r15 jle .L197 lea -1(%r15),%rbx mov %rbx,-200(%rbp) shr $3,%rbx mov %rbx,%rdx lea 32(%r11),%rbx lea (%r15,%r15),%r8 mov %rbx,-120(%rbp) lea 0(,%r15,4),%rsi lea (%r8,%r15),%rdi lea 0(,%r15,8),%rcx sal $5,%rdx lea (%rdi,%rdi),%r10 lea (%rsi,%r15),%r9 sub %r15,%rcx mov %r11,%rax add %rbx,%rdx .L61: vmovdqu (%rax),%ymm0 vmovdqu (%rax,%rsi,4),%ymm2 vmovdqu (%rax,%r10,4),%ymm3 vpminsd %ymm2,%ymm0,%ymm4 vpmaxsd %ymm2,%ymm0,%ymm2 vmovdqu (%rax,%r8,4),%ymm0 vpminsd %ymm3,%ymm0,%ymm1 vpmaxsd %ymm3,%ymm0,%ymm0 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpminsd %ymm4,%ymm1,%ymm2 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm1,%ymm3,%ymm4 vpmaxsd %ymm1,%ymm3,%ymm1 vmovdqu %ymm0,(%rax) vmovdqu %ymm4,(%rax,%r8,4) vmovdqu %ymm1,(%rax,%rsi,4) vmovdqu %ymm2,(%rax,%r10,4) vmovdqu (%rax,%r15,4),%ymm2 vmovdqu (%rax,%r9,4),%ymm0 vmovdqu (%rax,%rdi,4),%ymm4 vpminsd %ymm2,%ymm0,%ymm1 vpmaxsd %ymm2,%ymm0,%ymm0 vmovdqu (%rax,%rcx,4),%ymm2 vpminsd %ymm4,%ymm2,%ymm3 vpmaxsd %ymm4,%ymm2,%ymm2 vpminsd %ymm3,%ymm1,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpminsd %ymm1,%ymm3,%ymm2 vpmaxsd %ymm1,%ymm3,%ymm1 vmovdqu %ymm4,(%rax,%r15,4) vmovdqu %ymm1,(%rax,%rdi,4) vmovdqu %ymm2,(%rax,%r9,4) vmovdqu %ymm0,(%rax,%rcx,4) add $32,%rax cmp %rdx,%rax jne .L61 .L62: lea 0(,%r15,8),%rax sub %r15,%rax lea (%r15,%r15),%r12 mov %rax,%r9 mov -144(%rbp),%rax lea 0(,%r15,4),%rbx lea (%r12,%r15),%r13 lea (%rbx,%r15),%r10 lea (%r13,%r13),%r14 cmp $127,%rax jg .L59 lea 64(%r11),%rdi dec %rax mov %rdi,-192(%rbp) mov %rax,-176(%rbp) .L60: mov -144(%rbp),%rdi mov %r11,-208(%rbp) lea (%r11,%rdi,4),%rax mov %rax,-112(%rbp) mov %rdi,%rax sar $4,%rax cmp $32,%rax sete %dl cmp $127,%rax mov %rax,-80(%rbp) setg %al or %eax,%edx mov -176(%rbp),%rax mov %dl,-152(%rbp) shr $4,%rax sal $6,%rax add -192(%rbp),%rax mov %rax,-128(%rbp) mov -200(%rbp),%rax movl $3,-184(%rbp) shr $3,%rax sal $5,%rax add -120(%rbp),%rax mov %rax,-160(%rbp) movq $4,-136(%rbp) mov %r12,-200(%rbp) mov %r13,-216(%rbp) mov %r10,-224(%rbp) mov %r9,-232(%rbp) vmovdqa .LC1(%rip),%ymm11 vmovdqa .LC3(%rip),%ymm10 vmovdqa .LC2(%rip),%ymm12 mov %rbx,-192(%rbp) mov %rdi,%rbx .L63: cmpq $4,-136(%rbp) je .L198 cmpq $2,-136(%rbp) je .L91 mov -112(%rbp),%rdx mov %r11,%rax cmp -112(%rbp),%r11 je .L90 .L92: vpxor 32(%rax),%ymm10,%ymm2 vpxor (%rax),%ymm10,%ymm1 add $64,%rax vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm2 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vmovdqu %ymm1,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,%rdx jne .L92 .L90: cmpb $0,-152(%rbp) mov -80(%rbp),%r12 je .L89 mov %rbx,%r13 mov %r11,%rbx .p2align 4,,10 .p2align 3 .L146: mov %r12,%rdx sar $2,%rdx mov %r13,%rsi mov %rbx,%rdi vzeroupper sar $3,%r12 call int32_threestages cmp $127,%r12 vmovdqa .LC1(%rip),%ymm11 vmovdqa .LC3(%rip),%ymm10 vmovdqa .LC2(%rip),%ymm12 jg .L146 cmp $32,%r12 je .L146 mov %rbx,%r11 mov %r13,%rbx .L89: cmp $15,%r12 jle .L94 mov -120(%rbp),%r13 .p2align 4,,10 .p2align 3 .L100: mov %r12,%rdx sar %rdx test %rbx,%rbx jle .L95 lea (%rdx,%rdx),%rcx lea -1(%rdx),%r9 lea (%rcx,%rdx),%rsi lea 0(,%rdx,4),%r10 xor %r8d,%r8d and $-8,%r9 .p2align 4,,10 .p2align 3 .L96: lea (%rdx,%r8),%rax cmp %rax,%r8 jge .L99 lea (%r9,%r8),%rdi lea (%r11,%r8,4),%rax lea 0(%r13,%rdi,4),%rdi .p2align 4,,10 .p2align 3 .L97: vmovdqu (%rax,%rcx,4),%ymm1 vmovdqu (%rax),%ymm0 vmovdqu (%rax,%rsi,4),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu (%rax,%rdx,4),%ymm1 vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%rax) vmovdqu %ymm2,(%rax,%rdx,4) vmovdqu %ymm3,(%rax,%rcx,4) vmovdqu %ymm0,(%rax,%rsi,4) add $32,%rax cmp %rdi,%rax jne .L97 .L99: add %r10,%r8 cmp %r8,%rbx jg .L96 .L95: sar $2,%r12 cmp $15,%r12 jg .L100 .L94: cmp $8,%r12 je .L101 .L104: mov %r11,%rax test %r15,%r15 jle .L103 mov -160(%rbp),%r9 mov -192(%rbp),%rdx mov -200(%rbp),%rcx mov -216(%rbp),%rsi mov -224(%rbp),%rdi mov -232(%rbp),%r8 .p2align 4,,10 .p2align 3 .L102: vmovdqu (%rax,%r15,4),%ymm0 vmovdqu (%rax),%ymm1 vmovdqu (%rax,%rcx,4),%ymm2 vmovdqu (%rax,%rdi,4),%ymm4 vmovdqu (%rax,%rdx,4),%ymm7 vpminsd %ymm0,%ymm1,%ymm5 vpmaxsd %ymm0,%ymm1,%ymm1 vmovdqu (%rax,%rsi,4),%ymm0 vmovdqu (%rax,%r14,4),%ymm8 vpminsd %ymm0,%ymm2,%ymm3 vpmaxsd %ymm0,%ymm2,%ymm0 vpminsd %ymm4,%ymm7,%ymm2 vpmaxsd %ymm4,%ymm7,%ymm7 vmovdqu (%rax,%r8,4),%ymm4 vpminsd %ymm3,%ymm5,%ymm9 vpminsd %ymm4,%ymm8,%ymm6 vpmaxsd %ymm4,%ymm8,%ymm4 vpmaxsd %ymm3,%ymm5,%ymm5 vpminsd %ymm0,%ymm1,%ymm8 vpminsd %ymm6,%ymm2,%ymm3 vpmaxsd %ymm0,%ymm1,%ymm0 vpmaxsd %ymm6,%ymm2,%ymm1 vpminsd %ymm4,%ymm7,%ymm2 vpmaxsd %ymm4,%ymm7,%ymm4 vpminsd %ymm3,%ymm9,%ymm6 vpminsd %ymm2,%ymm8,%ymm7 vpmaxsd %ymm3,%ymm9,%ymm3 vpmaxsd %ymm2,%ymm8,%ymm2 vpminsd %ymm1,%ymm5,%ymm8 vpmaxsd %ymm1,%ymm5,%ymm1 vpminsd %ymm4,%ymm0,%ymm5 vpmaxsd %ymm4,%ymm0,%ymm0 vmovdqu %ymm6,(%rax) vmovdqu %ymm7,(%rax,%r15,4) vmovdqu %ymm8,(%rax,%rcx,4) vmovdqu %ymm5,(%rax,%rsi,4) vmovdqu %ymm3,(%rax,%rdx,4) vmovdqu %ymm2,(%rax,%rdi,4) vmovdqu %ymm1,(%rax,%r14,4) vmovdqu %ymm0,(%rax,%r8,4) add $32,%rax cmp %rax,%r9 jne .L102 .L103: sarq -136(%rbp) decl -184(%rbp) jne .L63 cmpq $0,-144(%rbp) jle .L113 mov -176(%rbp),%rax vpcmpeqd %ymm4,%ymm4,%ymm4 shr $6,%rax sal $8,%rax lea 256(%r11,%rax),%rdx mov %r11,%rax jmp .L112 .L199: vpxor %ymm4,%ymm7,%ymm7 vpxor %ymm4,%ymm2,%ymm2 vpxor %ymm4,%ymm1,%ymm1 vpxor %ymm4,%ymm0,%ymm0 .L111: vperm2i128 $32,%ymm5,%ymm9,%ymm11 vperm2i128 $32,%ymm6,%ymm10,%ymm3 vperm2i128 $32,%ymm1,%ymm7,%ymm12 vperm2i128 $32,%ymm0,%ymm2,%ymm8 vperm2i128 $49,%ymm6,%ymm10,%ymm6 vperm2i128 $49,%ymm5,%ymm9,%ymm9 vperm2i128 $49,%ymm1,%ymm7,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vpminsd %ymm3,%ymm12,%ymm7 vpmaxsd %ymm11,%ymm8,%ymm2 vpminsd %ymm9,%ymm0,%ymm10 vpminsd %ymm6,%ymm1,%ymm5 vpmaxsd %ymm9,%ymm0,%ymm0 vpmaxsd %ymm3,%ymm12,%ymm3 vpmaxsd %ymm6,%ymm1,%ymm1 vpminsd %ymm11,%ymm8,%ymm12 vpminsd %ymm12,%ymm7,%ymm9 vpmaxsd %ymm12,%ymm7,%ymm6 vpminsd %ymm10,%ymm5,%ymm8 vpminsd %ymm2,%ymm3,%ymm7 vpmaxsd %ymm10,%ymm5,%ymm5 vpmaxsd %ymm2,%ymm3,%ymm3 vpminsd %ymm0,%ymm1,%ymm2 vpmaxsd %ymm0,%ymm1,%ymm1 vpminsd %ymm8,%ymm9,%ymm10 vpmaxsd %ymm5,%ymm6,%ymm0 vpmaxsd %ymm8,%ymm9,%ymm8 vpminsd %ymm2,%ymm7,%ymm9 vpmaxsd %ymm2,%ymm7,%ymm7 vpminsd %ymm5,%ymm6,%ymm2 vpminsd %ymm1,%ymm3,%ymm5 vpmaxsd %ymm1,%ymm3,%ymm3 vpunpckldq %ymm9,%ymm10,%ymm11 vpunpckhdq %ymm9,%ymm10,%ymm6 vpunpckldq %ymm7,%ymm8,%ymm1 vpunpckldq %ymm5,%ymm2,%ymm9 vpunpckldq %ymm3,%ymm0,%ymm10 vpunpckhdq %ymm5,%ymm2,%ymm2 vpunpckhdq %ymm3,%ymm0,%ymm0 vpunpckhdq %ymm7,%ymm8,%ymm5 vpunpcklqdq %ymm9,%ymm11,%ymm3 vpunpcklqdq %ymm2,%ymm6,%ymm8 vpunpckhqdq %ymm9,%ymm11,%ymm7 vpunpckhqdq %ymm2,%ymm6,%ymm6 vpunpcklqdq %ymm0,%ymm5,%ymm9 vpunpcklqdq %ymm10,%ymm1,%ymm2 vpunpckhqdq %ymm0,%ymm5,%ymm0 vpunpckhqdq %ymm10,%ymm1,%ymm1 vperm2i128 $32,%ymm2,%ymm3,%ymm12 vperm2i128 $32,%ymm1,%ymm7,%ymm11 vperm2i128 $32,%ymm0,%ymm6,%ymm5 vperm2i128 $49,%ymm2,%ymm3,%ymm3 vperm2i128 $32,%ymm9,%ymm8,%ymm10 vperm2i128 $49,%ymm1,%ymm7,%ymm2 vperm2i128 $49,%ymm0,%ymm6,%ymm0 vperm2i128 $49,%ymm9,%ymm8,%ymm1 vmovdqu %ymm12,(%rax) vmovdqu %ymm11,32(%rax) vmovdqu %ymm10,64(%rax) vmovdqu %ymm5,96(%rax) vmovdqu %ymm3,128(%rax) vmovdqu %ymm2,160(%rax) vmovdqu %ymm1,192(%rax) vmovdqu %ymm0,224(%rax) add $256,%rax cmp %rdx,%rax je .L113 .L112: vmovdqu 32(%rax),%ymm0 vmovdqu (%rax),%ymm2 vmovdqu 128(%rax),%ymm3 vpunpckhdq %ymm0,%ymm2,%ymm5 vpunpckldq %ymm0,%ymm2,%ymm7 vmovdqu 96(%rax),%ymm0 vmovdqu 64(%rax),%ymm2 vmovdqu 224(%rax),%ymm9 vpunpckldq %ymm0,%ymm2,%ymm6 vpunpckhdq %ymm0,%ymm2,%ymm2 vmovdqu 160(%rax),%ymm0 mov -164(%rbp),%ebx vpunpckldq %ymm0,%ymm3,%ymm1 vpunpckhdq %ymm0,%ymm3,%ymm0 vmovdqu 192(%rax),%ymm3 vpunpcklqdq %ymm6,%ymm7,%ymm10 vpunpckldq %ymm9,%ymm3,%ymm8 vpunpckhdq %ymm9,%ymm3,%ymm3 vpunpckhqdq %ymm6,%ymm7,%ymm7 vpunpcklqdq %ymm2,%ymm5,%ymm9 vpunpcklqdq %ymm8,%ymm1,%ymm6 vpunpckhqdq %ymm2,%ymm5,%ymm2 vpunpckhqdq %ymm8,%ymm1,%ymm1 vpunpcklqdq %ymm3,%ymm0,%ymm5 vpunpckhqdq %ymm3,%ymm0,%ymm0 test %ebx,%ebx jne .L199 vpxor %ymm4,%ymm10,%ymm10 vpxor %ymm4,%ymm9,%ymm9 vpxor %ymm4,%ymm6,%ymm6 vpxor %ymm4,%ymm5,%ymm5 jmp .L111 .L91: mov -112(%rbp),%rdx cmp %rdx,%r11 je .L90 mov %r11,%rax .L93: vpxor 32(%rax),%ymm11,%ymm2 vpxor (%rax),%ymm11,%ymm1 add $64,%rax vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vmovdqu %ymm1,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,%rdx jne .L93 jmp .L90 .L101: test %rbx,%rbx jle .L104 mov %r11,%rax .L105: vmovdqu 32(%rax),%ymm1 vmovdqu (%rax),%ymm0 add $64,%rax vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,-128(%rbp) jne .L105 jmp .L104 .L198: mov %r11,%rax cmp -112(%rbp),%r11 je .L90 .L87: vpxor 32(%rax),%ymm12,%ymm0 vpxor (%rax),%ymm12,%ymm1 vmovdqu %ymm0,32(%rax) vmovdqu %ymm1,(%rax) add $64,%rax cmp %rax,-112(%rbp) jne .L87 jmp .L90 .L113: cmpb $0,-152(%rbp) mov -80(%rbp),%r13 je .L109 mov %r15,-112(%rbp) mov -120(%rbp),%r15 .L145: mov -80(%rbp),%rdx sar $2,%rdx cmpq $0,-144(%rbp) jle .L114 lea (%rdx,%rdx),%rdi lea 0(,%rdx,8),%r14 lea (%rdi,%rdx),%rcx lea 0(,%rdx,4),%rsi mov %r14,%r8 lea -1(%rdx),%r13 lea (%rsi,%rdx),%r10 lea (%rcx,%rcx),%r9 sub %rdx,%r8 xor %r12d,%r12d and $-8,%r13 .p2align 4,,10 .p2align 3 .L115: lea (%rdx,%r12),%rax cmp %rax,%r12 jge .L118 lea 0(%r13,%r12),%rbx lea (%r11,%r12,4),%rax lea (%r15,%rbx,4),%rbx .p2align 4,,10 .p2align 3 .L116: vmovdqu (%rax,%rsi,4),%ymm0 vmovdqu (%rax),%ymm6 vmovdqu (%rax,%rdx,4),%ymm1 vpminsd %ymm0,%ymm6,%ymm7 vpmaxsd %ymm0,%ymm6,%ymm6 vmovdqu (%rax,%r10,4),%ymm0 vmovdqu (%rax,%r8,4),%ymm8 vpmaxsd %ymm0,%ymm1,%ymm3 vpminsd %ymm0,%ymm1,%ymm2 vmovdqu (%rax,%r9,4),%ymm1 vmovdqu (%rax,%rdi,4),%ymm0 vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu (%rax,%rcx,4),%ymm1 vpminsd %ymm8,%ymm1,%ymm5 vpmaxsd %ymm8,%ymm1,%ymm1 vpminsd %ymm4,%ymm7,%ymm8 vpmaxsd %ymm4,%ymm7,%ymm4 vpminsd %ymm5,%ymm2,%ymm7 vpmaxsd %ymm5,%ymm2,%ymm2 vpminsd %ymm0,%ymm6,%ymm5 vpmaxsd %ymm0,%ymm6,%ymm0 vpminsd %ymm1,%ymm3,%ymm6 vpmaxsd %ymm1,%ymm3,%ymm1 vpminsd %ymm7,%ymm8,%ymm9 vpmaxsd %ymm7,%ymm8,%ymm3 vpminsd %ymm2,%ymm4,%ymm8 vpminsd %ymm6,%ymm5,%ymm7 vpmaxsd %ymm2,%ymm4,%ymm2 vpmaxsd %ymm6,%ymm5,%ymm5 vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm9,(%rax) vmovdqu %ymm3,(%rax,%rdx,4) vmovdqu %ymm8,(%rax,%rdi,4) vmovdqu %ymm2,(%rax,%rcx,4) vmovdqu %ymm7,(%rax,%rsi,4) vmovdqu %ymm5,(%rax,%r10,4) vmovdqu %ymm4,(%rax,%r9,4) vmovdqu %ymm0,(%rax,%r8,4) add $32,%rax cmp %rbx,%rax jne .L116 .L118: add %r14,%r12 cmp %r12,-144(%rbp) jg .L115 .L114: sarq $3,-80(%rbp) mov -80(%rbp),%rax cmp $127,%rax jg .L145 cmp $32,%rax je .L145 mov -112(%rbp),%r15 mov %rax,%r13 .L109: cmp $15,%r13 jle .L119 mov -144(%rbp),%r10 mov -120(%rbp),%r12 .L125: mov %r13,%rdx sar %rdx test %r10,%r10 jle .L120 lea (%rdx,%rdx),%rcx lea -1(%rdx),%r9 lea (%rcx,%rdx),%rsi lea 0(,%rdx,4),%rbx xor %r8d,%r8d and $-8,%r9 .p2align 4,,10 .p2align 3 .L121: lea (%rdx,%r8),%rax cmp %rax,%r8 jge .L124 lea (%r9,%r8),%rdi lea (%r11,%r8,4),%rax lea (%r12,%rdi,4),%rdi .p2align 4,,10 .p2align 3 .L122: vmovdqu (%rax,%rcx,4),%ymm1 vmovdqu (%rax),%ymm0 vmovdqu (%rax,%rsi,4),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu (%rax,%rdx,4),%ymm1 vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%rax) vmovdqu %ymm2,(%rax,%rdx,4) vmovdqu %ymm3,(%rax,%rcx,4) vmovdqu %ymm0,(%rax,%rsi,4) add $32,%rax cmp %rax,%rdi jne .L122 .L124: add %rbx,%r8 cmp %r8,%r10 jg .L121 .L120: sar $2,%r13 cmp $15,%r13 jg .L125 mov %r13,-80(%rbp) .L119: cmpq $8,-80(%rbp) je .L126 .L129: test %r15,%r15 jle .L192 lea (%r15,%r15),%rsi lea (%rsi,%r15),%rdx lea 0(,%r15,4),%rcx lea 0(,%r15,8),%rax mov -208(%rbp),%r9 lea (%rcx,%r15),%r8 lea (%rdx,%rdx),%rdi sub %r15,%rax vpcmpeqd %ymm6,%ymm6,%ymm6 .L132: vmovdqu (%r9,%r15,4),%ymm1 vmovdqu (%r9),%ymm0 vmovdqu (%r9,%r8,4),%ymm8 vpmaxsd %ymm0,%ymm1,%ymm4 vpminsd %ymm0,%ymm1,%ymm5 vmovdqu (%r9,%rdx,4),%ymm0 vmovdqu (%r9,%rsi,4),%ymm1 vmovdqu (%r9,%rdi,4),%ymm7 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm1 vmovdqu (%r9,%rcx,4),%ymm0 mov -164(%rbp),%r10d vpminsd %ymm0,%ymm8,%ymm2 vpmaxsd %ymm0,%ymm8,%ymm8 vmovdqu (%r9,%rax,4),%ymm0 vpminsd %ymm7,%ymm0,%ymm10 vpmaxsd %ymm7,%ymm0,%ymm0 vpminsd %ymm10,%ymm2,%ymm9 vpminsd %ymm3,%ymm5,%ymm7 vpmaxsd %ymm10,%ymm2,%ymm2 vpmaxsd %ymm3,%ymm5,%ymm5 vpminsd %ymm1,%ymm4,%ymm3 vpmaxsd %ymm1,%ymm4,%ymm1 vpminsd %ymm0,%ymm8,%ymm4 vpmaxsd %ymm0,%ymm8,%ymm8 vpminsd %ymm4,%ymm3,%ymm11 vpminsd %ymm9,%ymm7,%ymm0 vpmaxsd %ymm4,%ymm3,%ymm3 vpmaxsd %ymm9,%ymm7,%ymm7 vpminsd %ymm8,%ymm1,%ymm4 vpminsd %ymm2,%ymm5,%ymm9 vpmaxsd %ymm8,%ymm1,%ymm1 vpmaxsd %ymm2,%ymm5,%ymm2 vpunpckldq %ymm3,%ymm11,%ymm10 vpunpckhdq %ymm2,%ymm9,%ymm5 vpunpckhdq %ymm3,%ymm11,%ymm3 vpunpckldq %ymm7,%ymm0,%ymm8 vpunpckldq %ymm2,%ymm9,%ymm11 vpunpckhdq %ymm7,%ymm0,%ymm0 vpunpckldq %ymm1,%ymm4,%ymm9 vpunpckhdq %ymm1,%ymm4,%ymm4 vpunpcklqdq %ymm5,%ymm0,%ymm2 vpunpcklqdq %ymm9,%ymm10,%ymm13 vpunpcklqdq %ymm4,%ymm3,%ymm12 vpunpcklqdq %ymm11,%ymm8,%ymm7 vpunpckhqdq %ymm9,%ymm10,%ymm1 vpunpckhqdq %ymm11,%ymm8,%ymm8 vpunpckhqdq %ymm4,%ymm3,%ymm4 vpunpckhqdq %ymm5,%ymm0,%ymm0 vperm2i128 $32,%ymm12,%ymm2,%ymm10 vperm2i128 $32,%ymm1,%ymm8,%ymm9 vperm2i128 $32,%ymm4,%ymm0,%ymm5 vperm2i128 $32,%ymm13,%ymm7,%ymm11 vperm2i128 $49,%ymm13,%ymm7,%ymm3 vperm2i128 $49,%ymm12,%ymm2,%ymm2 vperm2i128 $49,%ymm1,%ymm8,%ymm1 vperm2i128 $49,%ymm4,%ymm0,%ymm0 test %r10d,%r10d je .L131 vpxor %ymm6,%ymm11,%ymm11 vpxor %ymm6,%ymm10,%ymm10 vpxor %ymm6,%ymm9,%ymm9 vpxor %ymm6,%ymm5,%ymm5 vpxor %ymm6,%ymm3,%ymm3 vpxor %ymm6,%ymm2,%ymm2 vpxor %ymm6,%ymm1,%ymm1 vpxor %ymm6,%ymm0,%ymm0 .L131: vmovdqu %ymm11,(%r9) vmovdqu %ymm3,(%r9,%r15,4) vmovdqu %ymm10,(%r9,%rsi,4) vmovdqu %ymm2,(%r9,%rdx,4) vmovdqu %ymm9,(%r9,%rcx,4) vmovdqu %ymm1,(%r9,%r8,4) vmovdqu %ymm5,(%r9,%rdi,4) vmovdqu %ymm0,(%r9,%rax,4) add $32,%r9 cmp %r9,-160(%rbp) jne .L132 .L192: vzeroupper .L190: add $200,%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp lea -16(%r13),%rsp pop %r13 ret .L59: dec %rax mov %rax,-176(%rbp) shr $5,%rax sal $7,%rax lea 128(%r11,%rax),%rax mov %rax,-184(%rbp) vpcmpeqd %ymm0,%ymm0,%ymm0 mov %r11,%rax .L64: vpxor 64(%rax),%ymm0,%ymm1 vpxor (%rax),%ymm0,%ymm2 vmovdqu %ymm1,64(%rax) vmovdqu %ymm2,(%rax) sub $-128,%rax cmp -184(%rbp),%rax jne .L64 mov -176(%rbp),%rdi lea 64(%r11),%rsi mov %rdi,%rax shr $4,%rax sal $6,%rax add %rsi,%rax mov %rax,-208(%rbp) mov %rdi,%rax shr $6,%rax sal $8,%rax lea 256(%r11,%rax),%rax mov $4,%ecx mov %r14,%r8 mov %rsi,-192(%rbp) mov %rax,-216(%rbp) movq $8,-112(%rbp) vpcmpeqd %ymm11,%ymm11,%ymm11 mov %r10,%r14 cmp $64,%rcx je .L200 .L68: cmp $32,%rcx je .L201 cmp $16,%rcx je .L74 cmp $8,%rcx je .L202 .L76: mov -112(%rbp),%rdi xor %edx,%edx lea (%rdi,%rdi),%rax cmp %r15,%rax mov %rax,-152(%rbp) setne %al movzbl %al,%eax mov %eax,-160(%rbp) lea -1(%rdi),%rax sete %dl and $-8,%rax movq $0,-136(%rbp) mov %rax,-128(%rbp) mov %rdi,%r10 test %r15,%r15 jle .L73 .L78: mov -112(%rbp),%rax mov -136(%rbp),%rdi add %r10,%rax cmp %rdi,%rax jle .L81 mov %rdi,%rsi .p2align 4,,10 .p2align 3 .L84: mov %rsi,%rcx mov %rsi,%rdi add -112(%rbp),%rsi cmp %rsi,%rcx jge .L83 lea (%r11,%rcx,4),%rax mov %rax,-80(%rbp) mov -120(%rbp),%rax add -128(%rbp),%rcx lea (%rax,%rcx,4),%rcx mov -80(%rbp),%rax .p2align 4,,10 .p2align 3 .L80: vmovdqu (%rax),%ymm0 vmovdqu (%rax,%r15,4),%ymm15 vmovdqu (%rax,%r13,4),%ymm7 vpminsd %ymm0,%ymm15,%ymm6 vpmaxsd %ymm0,%ymm15,%ymm15 vmovdqu (%rax,%r12,4),%ymm0 vmovdqu (%rax,%r14,4),%ymm5 vpminsd %ymm0,%ymm7,%ymm1 vpmaxsd %ymm0,%ymm7,%ymm7 vmovdqu (%rax,%rbx,4),%ymm0 vmovdqu (%rax,%r9,4),%ymm4 vpminsd %ymm0,%ymm5,%ymm9 vpmaxsd %ymm0,%ymm5,%ymm5 vmovdqu (%rax,%r8,4),%ymm0 vpminsd %ymm1,%ymm6,%ymm8 vpminsd %ymm0,%ymm4,%ymm3 vpmaxsd %ymm0,%ymm4,%ymm4 vpminsd %ymm3,%ymm9,%ymm2 vpmaxsd %ymm4,%ymm5,%ymm0 vpmaxsd %ymm3,%ymm9,%ymm3 vpmaxsd %ymm1,%ymm6,%ymm6 vpminsd %ymm7,%ymm15,%ymm1 vpmaxsd %ymm7,%ymm15,%ymm15 vpminsd %ymm4,%ymm5,%ymm7 vpminsd %ymm2,%ymm8,%ymm14 vpminsd %ymm7,%ymm1,%ymm13 vpminsd %ymm3,%ymm6,%ymm12 vpminsd %ymm0,%ymm15,%ymm10 vpmaxsd %ymm3,%ymm6,%ymm6 vpmaxsd %ymm2,%ymm8,%ymm2 vpmaxsd %ymm7,%ymm1,%ymm1 vpmaxsd %ymm0,%ymm15,%ymm0 vmovdqa %ymm6,-80(%rbp) vmovdqa %ymm6,%ymm3 vmovdqa %ymm14,%ymm9 vmovdqa %ymm2,%ymm5 vmovdqa %ymm13,%ymm8 vmovdqa %ymm1,%ymm4 vmovdqa %ymm12,%ymm7 vmovdqa %ymm10,%ymm6 vmovdqa %ymm0,%ymm15 test %edx,%edx je .L79 vpxor -80(%rbp),%ymm11,%ymm3 vpxor %ymm14,%ymm11,%ymm9 vpxor %ymm13,%ymm11,%ymm8 vpxor %ymm12,%ymm11,%ymm7 vpxor %ymm10,%ymm11,%ymm6 vpxor %ymm2,%ymm11,%ymm5 vpxor %ymm1,%ymm11,%ymm4 vpxor %ymm0,%ymm11,%ymm15 .L79: vmovdqu %ymm9,(%rax) vmovdqu %ymm8,(%rax,%r15,4) vmovdqu %ymm7,(%rax,%r12,4) vmovdqu %ymm6,(%rax,%r13,4) vmovdqu %ymm5,(%rax,%rbx,4) vmovdqu %ymm4,(%rax,%r14,4) vmovdqu %ymm3,(%rax,%r8,4) vmovdqu %ymm15,(%rax,%r9,4) add $32,%rax cmp %rax,%rcx jne .L80 .L83: xor $1,%edx cmp %rdi,%r10 jg .L84 .L81: mov -152(%rbp),%rdi xor -160(%rbp),%edx add %rdi,-136(%rbp) add %rdi,%r10 mov -136(%rbp),%rax cmp %rax,%r15 jg .L78 .L73: mov -112(%rbp),%rax sal $4,%rax cmp -144(%rbp),%rax je .L203 mov -152(%rbp),%rax mov %rax,%rcx sar %rcx cmp $254,%rax jle .L66 mov %r8,-136(%rbp) mov %r9,-160(%rbp) mov %r15,-80(%rbp) mov -144(%rbp),%r15 mov %rbx,-112(%rbp) mov %r12,-128(%rbp) mov %rcx,%rbx mov %r11,%r12 .L67: mov %rbx,%rdx sar $2,%rdx mov %r15,%rsi mov %r12,%rdi vzeroupper sar $3,%rbx call int32_threestages cmp $127,%rbx vpcmpeqd %ymm11,%ymm11,%ymm11 jg .L67 mov %rbx,%rcx mov %r12,%r11 mov -80(%rbp),%r15 mov -112(%rbp),%rbx mov -128(%rbp),%r12 mov -136(%rbp),%r8 mov -160(%rbp),%r9 .L66: mov -152(%rbp),%rax mov %rax,-112(%rbp) cmp $64,%rcx jne .L68 .L200: mov -144(%rbp),%rsi mov %r11,%rdi vzeroupper call int32_twostages_32 vpcmpeqd %ymm11,%ymm11,%ymm11 .L74: mov %r11,%rax .L69: vmovdqu 64(%rax),%ymm1 vmovdqu (%rax),%ymm0 vmovdqu 96(%rax),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu 32(%rax),%ymm1 sub $-128,%rax vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,-128(%rax) vmovdqu %ymm2,-96(%rax) vmovdqu %ymm3,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp -184(%rbp),%rax jne .L69 jmp .L76 .L202: mov %r11,%rax .L77: vmovdqu 32(%rax),%ymm0 vmovdqu (%rax),%ymm1 add $64,%rax vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,-208(%rbp) jne .L77 jmp .L76 .L203: mov %r14,%r10 mov %r8,%r14 jmp .L60 .L201: mov %r11,%rax .L71: vmovdqu 128(%rax),%ymm0 vmovdqu (%rax),%ymm6 vmovdqu 32(%rax),%ymm1 vpminsd %ymm0,%ymm6,%ymm7 vpmaxsd %ymm0,%ymm6,%ymm6 vmovdqu 160(%rax),%ymm0 vmovdqu 224(%rax),%ymm8 vpminsd %ymm0,%ymm1,%ymm5 vpmaxsd %ymm0,%ymm1,%ymm3 vmovdqu 192(%rax),%ymm1 vmovdqu 64(%rax),%ymm0 add $256,%rax vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu -160(%rax),%ymm1 vpminsd %ymm8,%ymm1,%ymm2 vpmaxsd %ymm8,%ymm1,%ymm1 vpminsd %ymm4,%ymm7,%ymm8 vpmaxsd %ymm4,%ymm7,%ymm4 vpminsd %ymm2,%ymm5,%ymm7 vpmaxsd %ymm2,%ymm5,%ymm2 vpminsd %ymm0,%ymm6,%ymm5 vpmaxsd %ymm0,%ymm6,%ymm0 vpminsd %ymm1,%ymm3,%ymm6 vpmaxsd %ymm1,%ymm3,%ymm1 vpminsd %ymm7,%ymm8,%ymm9 vpmaxsd %ymm7,%ymm8,%ymm3 vpminsd %ymm2,%ymm4,%ymm8 vpminsd %ymm6,%ymm5,%ymm7 vpmaxsd %ymm2,%ymm4,%ymm2 vpmaxsd %ymm6,%ymm5,%ymm5 vpminsd %ymm1,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm9,-256(%rax) vmovdqu %ymm3,-224(%rax) vmovdqu %ymm8,-192(%rax) vmovdqu %ymm2,-160(%rax) vmovdqu %ymm7,-128(%rax) vmovdqu %ymm5,-96(%rax) vmovdqu %ymm4,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,-216(%rbp) jne .L71 jmp .L76 .L194: mov 4(%rdi),%eax mov 12(%rdi),%ebx cmp (%rdi),%eax cmovg (%rdi),%ecx cmovg %eax,%r9d cmp 8(%rdi),%ebx cmovg 8(%rdi),%eax cmovg %ebx,%edi cmp %ecx,%eax cmovg %ecx,%r8d cmovg %eax,%eax cmp %r9d,%edi cmovg %r9d,%edx cmovg %edi,%r9d cmp %eax,%edx cmovg %eax,%r12d cmovg %edx,%r10d mov 20(%r11),%eax cmp 16(%r11),%eax cmovg 16(%r11),%esi cmovg %eax,%ecx mov 28(%r11),%eax cmp 24(%r11),%eax cmovg 24(%r11),%edx cmovg %eax,%edi cmp %ecx,%edi cmovg %ecx,%eax cmovg %edi,%edi cmp %esi,%edx cmovg %esi,%ecx cmovg %edx,%edx cmp %r9d,%edi cmovg %r9d,%ebx cmovg %edi,%edi cmp %edx,%eax cmovg %edx,%esi cmovg %eax,%edx mov %edi,(%r11) cmp %r12d,%esi cmovg %r12d,%r9d cmovg %esi,%esi cmp %r10d,%edx cmovg %r10d,%eax cmovg %edx,%edx cmp %esi,%ebx cmovg %esi,%r13d cmovg %ebx,%ebx cmp %r8d,%ecx cmovg %r8d,%esi cmovg %ecx,%ecx cmp %ecx,%eax cmovg %ecx,%r8d cmovg %eax,%eax mov %esi,28(%r11) cmp %edx,%ebx cmovg %edx,%r12d cmovg %ebx,%ecx cmp %eax,%r13d cmovg %eax,%ebx cmovg %r13d,%edx mov %ecx,4(%r11) cmp %r8d,%r9d cmovg %r8d,%r10d cmovg %r9d,%eax mov %r12d,8(%r11) mov %edx,12(%r11) mov %ebx,16(%r11) mov %eax,20(%r11) mov %r10d,24(%r11) jmp .L190 .L126: cmpq $0,-144(%rbp) jle .L129 mov %r11,%rax .L130: vmovdqu 32(%rax),%ymm1 vmovdqu (%rax),%ymm0 add $64,%rax vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,-128(%rbp) jne .L130 jmp .L129 .L195: vmovdqa .LC0(%rip),%ymm1 vmovdqa .LC1(%rip),%ymm3 vpxor 32(%rdi),%ymm1,%ymm2 vpxor (%rdi),%ymm1,%ymm1 mov -164(%rbp),%r14d vpunpckldq %ymm2,%ymm1,%ymm0 vpunpckhdq %ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm2 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm2,%ymm0,%ymm1 vpmaxsd %ymm2,%ymm0,%ymm0 vpxor %ymm3,%ymm1,%ymm1 vpxor %ymm3,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm1,%ymm4 vpunpckhdq %ymm0,%ymm1,%ymm0 vpmaxsd %ymm0,%ymm4,%ymm1 vpminsd %ymm0,%ymm4,%ymm2 vpunpcklqdq %ymm1,%ymm2,%ymm0 vpunpckhqdq %ymm1,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm0,%ymm1 vpunpckhdq %ymm2,%ymm0,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm4 vpunpckhqdq %ymm0,%ymm1,%ymm1 vpminsd %ymm4,%ymm1,%ymm2 vpmaxsd %ymm4,%ymm1,%ymm1 vpunpckldq %ymm1,%ymm2,%ymm0 vpunpckhdq %ymm1,%ymm2,%ymm1 vpxor %ymm3,%ymm1,%ymm1 vpxor %ymm3,%ymm0,%ymm0 vperm2i128 $32,%ymm1,%ymm0,%ymm2 vperm2i128 $49,%ymm1,%ymm0,%ymm0 vpminsd %ymm2,%ymm0,%ymm1 vpmaxsd %ymm2,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm1,%ymm3 vperm2i128 $49,%ymm0,%ymm1,%ymm0 vpminsd %ymm3,%ymm0,%ymm2 vpmaxsd %ymm3,%ymm0,%ymm0 vpunpcklqdq %ymm0,%ymm2,%ymm1 vpunpckhqdq %ymm0,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm1,%ymm0 vpunpckhdq %ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm3 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm3,%ymm0,%ymm2 vpmaxsd %ymm3,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm2 vpunpckhqdq %ymm0,%ymm1,%ymm1 vpcmpeqd %ymm0,%ymm0,%ymm0 test %r14d,%r14d je .L54 vpxor %ymm0,%ymm1,%ymm1 mov %edx,%esi .L55: vmovdqa %ymm2,%ymm0 mov %r11,%rdi .L193: add $200,%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp lea -16(%r13),%rsp pop %r13 jmp merge16_finish .L197: lea -1(%r15),%rax mov %rax,-200(%rbp) lea 32(%rdi),%rax mov %rax,-120(%rbp) jmp .L62 .L196: mov $1,%edx mov $16,%esi mov %rdi,-80(%rbp) call djbsort$avx2_2power mov -80(%rbp),%r11 xor %edx,%edx lea 64(%r11),%r12 mov $16,%esi mov %r12,%rdi call djbsort$avx2_2power mov -80(%rbp),%r11 mov -164(%rbp),%r13d vmovdqu (%r11),%ymm4 vmovdqu 32(%r11),%ymm1 vmovdqu 64(%r11),%ymm2 vmovdqu 96(%r11),%ymm3 test %r13d,%r13d je .L57 vpcmpeqd %ymm0,%ymm0,%ymm0 vpxor %ymm0,%ymm4,%ymm4 vpxor %ymm0,%ymm1,%ymm1 vpxor %ymm0,%ymm2,%ymm2 vpxor %ymm0,%ymm3,%ymm3 .L57: mov -164(%rbp),%esi vpmaxsd %ymm1,%ymm3,%ymm5 vpminsd %ymm4,%ymm2,%ymm0 vpminsd %ymm1,%ymm3,%ymm1 vpmaxsd %ymm4,%ymm2,%ymm4 mov %r11,%rdi vmovdqa %ymm4,-112(%rbp) vmovdqa %ymm5,-80(%rbp) call merge16_finish vmovdqa -80(%rbp),%ymm5 vmovdqa -112(%rbp),%ymm4 vmovdqa %ymm5,%ymm1 vmovdqa %ymm4,%ymm0 mov %r12,%rdi jmp .L193 .L54: vpxor %ymm0,%ymm2,%ymm2 mov %edx,%esi jmp .L55 .endfn djbsort$avx2_2power,globl .p2align 4 djbsort$avx2: push %rbp mov %rsp,%rbp push %r15 mov %rdi,%r15 push %r14 mov %rsi,%r14 push %r13 push %r12 push %rbx and $-32,%rsp sub $1056,%rsp cmp $8,%rsi jle .L265 blsr %rsi,%rax je .L220 lea -8(%rsi),%rax mov %rax,8(%rsp) mov $8,%ebx cmp $8,%rax jle .L266 .p2align 4,,10 .p2align 3 .L221: mov %rbx,%rax mov %r14,%r12 add %rbx,%rbx sub %rbx,%r12 cmp %rbx,%r12 jg .L221 cmp $128,%rbx jle .L267 mov $1,%edx mov %rbx,%rsi mov %r15,%rdi call djbsort$avx2_2power lea (%r15,%rbx,4),%rdi mov %r12,%rsi call djbsort$avx2 lea 32(%r15),%rax mov %rax,16(%rsp) jmp .L230 .p2align 4,,10 .p2align 3 .L228: lea 0(%r13,%r12),%rdx add %r11,%r12 sub %r9,%rdx lea (%r15,%r12,4),%rsi mov %r8,%rdi sar $3,%rbx call minmax_vector cmp $63,%rbx jle .L268 .L230: mov %rbx,%r12 sar $2,%r12 mov %r12,%rdx mov %r14,%rsi mov %r15,%rdi call int32_threestages lea 0(,%r12,4),%rcx mov %r14,%rdx sub %rcx,%rdx lea (%rcx,%rax),%r13 lea (%r15,%rax,4),%r8 lea (%r15,%r13,4),%rsi sub %rax,%rdx mov %r8,%rdi mov %rsi,24(%rsp) mov %rax,%r9 mov %rax,%r11 call minmax_vector cmp %r14,%r13 mov 24(%rsp),%rsi lea (%r12,%r12),%r10 jle .L269 .L226: mov %r14,%r13 sub %r10,%r13 lea (%r11,%r12,2),%rax mov %r13,%rdx sub %r9,%rdx lea (%r15,%rax,4),%rsi mov %r8,%rdi call minmax_vector add %r9,%r10 cmp %r14,%r10 jg .L228 mov %r10,%rax sub %r12,%rax mov %r10,%r11 lea (%r15,%r10,4),%r8 cmp %rax,%r9 jge .L247 sub %r9,%rax dec %rax and $-8,%rax lea (%r15,%r9,4),%rdx add %rax,%r9 mov 16(%rsp),%rax lea (%rax,%r9,4),%rax .p2align 4,,10 .p2align 3 .L229: vmovdqu (%rdx,%r12,4),%ymm0 vmovdqu (%rdx),%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,(%rdx) vmovdqu %ymm0,(%rdx,%r12,4) add $32,%rdx cmp %rdx,%rax jne .L229 mov %r10,%r9 vzeroupper jmp .L228 .p2align 4,,10 .p2align 3 .L267: mov %rbx,%rdx sar $2,%rdx sar $3,%rbx lea 0(,%rax,4),%r12 lea 32(%rsp),%r13 cmp %rbx,%rdx jle .L224 vmovdqa .LC4(%rip),%ymm0 .p2align 4,,10 .p2align 3 .L225: mov %rbx,%rax sal $5,%rax inc %rbx vmovdqa %ymm0,0(%r13,%rax) cmp %rdx,%rbx jl .L225 vzeroupper .L224: sal $2,%r14 mov %r14,%rdx mov %r15,%rsi mov %r13,%rdi call memcpy xor %edx,%edx mov %r12,%rsi mov %r13,%rdi call djbsort$avx2_2power mov %r14,%rdx mov %r13,%rsi mov %r15,%rdi call memcpy .L263: lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .p2align 4,,10 .p2align 3 .L269: lea (%r12,%r9),%rax cmp %rax,%r9 jge .L246 notq %r11 add %r11,%rax and $-8,%rax add %rax,%r9 mov 16(%rsp),%rax lea (%r10,%r12),%rdx mov %r8,%rdi lea (%rax,%r9,4),%rax .p2align 4,,10 .p2align 3 .L227: vmovdqu (%rdi),%ymm1 vmovdqu (%rdi,%r10,4),%ymm0 vmovdqu (%rdi,%r12,4),%ymm4 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu (%rdi,%rdx,4),%ymm1 vpminsd %ymm4,%ymm1,%ymm3 vpmaxsd %ymm4,%ymm1,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%rdi) vmovdqu %ymm2,(%rdi,%r12,4) vmovdqu %ymm3,(%rdi,%r10,4) vmovdqu %ymm0,(%rdi,%rdx,4) add $32,%rdi cmp %rdi,%rax jne .L227 mov %rsi,%r8 mov %r13,%r11 mov %r13,%r9 vzeroupper jmp .L226 .p2align 4,,10 .p2align 3 .L268: cmp $32,%rbx je .L270 mov %r15,%r10 cmp $16,%rbx je .L249 mov $32,%ebx xor %r11d,%r11d mov $15,%eax xor %r9d,%r9d .L237: cmp %rax,%r14 jle .L239 mov %r9,%rax .p2align 4,,10 .p2align 3 .L240: vmovdqu 32(%r15,%rax,4),%ymm0 vmovdqu (%r15,%rax,4),%ymm2 mov %rax,%rdx vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm2 vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 add $31,%rdx vpunpckhdq %ymm0,%ymm2,%ymm0 vmovdqu %ymm1,(%r15,%rax,4) vmovdqu %ymm0,32(%r15,%rax,4) add $16,%rax cmp %rdx,%r14 jg .L240 lea -16(%r14),%rax sub %r9,%rax lea 15(%r9),%rdx and $-16,%rax cmp %rdx,%r14 mov $0,%edx cmovle %rdx,%rax lea 16(%r9,%rax),%r9 mov %r9,%r11 lea 32(,%r9,4),%rbx lea (%r15,%r9,4),%r10 vzeroupper .L239: mov 8(%rsp),%rdx lea (%r15,%rbx),%rsi sub %r9,%rdx mov %r10,%rdi call minmax_vector lea 16(,%r11,4),%rax lea 7(%r9),%rdx lea (%r15,%rax),%rsi cmp %r14,%rdx jge .L241 mov (%r10),%ebx cmp (%rsi),%ebx cmovg (%rsi),%ecx cmovg %ebx,%edx mov %ecx,(%r10) mov %edx,(%rsi) lea -12(%r15,%rax),%rbx lea 4(%r15,%rax),%rdi mov (%rbx),%edx cmp (%rdi),%edx cmovg (%rdi),%ecx cmovg %edx,%edx mov %ecx,(%rbx) mov %edx,(%rdi) lea -8(%r15,%rax),%r11 lea 8(%r15,%rax),%rdx mov (%r11),%ecx cmp (%rdx),%ecx cmovg (%rdx),%r12d cmovg %ecx,%ecx mov %r12d,(%r11) mov %ecx,(%rdx) lea -4(%r15,%rax),%rcx lea 12(%r15,%rax),%rax mov (%rcx),%r13d cmp (%rax),%r13d cmovg (%rax),%r8d cmovg %r13d,%r13d mov %r8d,(%rcx) mov %r13d,(%rax) cmp %r12d,(%r10) cmovg %r12d,%r13d cmovg (%r10),%r12d mov %r13d,(%r10) mov %r12d,(%r11) mov (%rbx),%r8d cmp (%rcx),%r8d cmovg (%rcx),%r12d cmovg %r8d,%r13d mov %r12d,(%rbx) mov %r13d,(%rcx) cmp %r12d,(%r10) cmovg %r12d,%r13d cmovg (%r10),%r12d mov %r13d,(%r10) mov %r12d,(%rbx) mov (%r11),%r8d cmp (%rcx),%r8d cmovg (%rcx),%ebx cmovg %r8d,%r10d mov %ebx,(%r11) mov %r10d,(%rcx) lea 8(%r9),%r11 mov (%rsi),%ecx cmp (%rdx),%ecx cmovg (%rdx),%r10d cmovg %ecx,%ecx mov %r10d,(%rsi) mov %ecx,(%rdx) mov (%rdi),%ebx cmp (%rax),%ebx cmovg (%rax),%ecx cmovg %ebx,%r10d mov %ecx,(%rdi) mov %r10d,(%rax) cmp %ecx,(%rsi) cmovg %ecx,%r10d cmovg (%rsi),%ecx mov %r10d,(%rsi) mov %ecx,(%rdi) mov (%rdx),%ecx cmp (%rax),%ecx cmovg (%rax),%esi cmovg %ecx,%ecx mov %esi,(%rdx) mov %ecx,(%rax) lea 48(,%r9,4),%rax lea (%r15,%rax),%rsi lea -16(%r15,%rax),%r10 mov %r11,%r9 .L241: lea -4(%r14),%rdx sub %r9,%rdx mov %r10,%rdi call minmax_vector lea 3(%r9),%rax cmp %r14,%rax jge .L242 lea 8(,%r11,4),%rax lea (%r15,%rax),%rdx mov (%r10),%ecx cmp (%rdx),%ecx cmovg (%rdx),%esi cmovg %ecx,%ecx mov %esi,(%r10) mov %ecx,(%rdx) lea -4(%r15,%rax),%rsi lea 4(%r15,%rax),%rax mov (%rsi),%ebx cmp (%rax),%ebx cmovg (%rax),%ecx cmovg %ebx,%edi mov %ecx,(%rsi) mov %edi,(%rax) cmp %ecx,(%r10) cmovg %ecx,%edi cmovg (%r10),%ecx mov %edi,(%r10) mov %ecx,(%rsi) add $4,%r9 mov (%rdx),%ecx cmp (%rax),%ecx cmovg (%rax),%esi cmovg %ecx,%ecx mov %esi,(%rdx) mov %ecx,(%rax) .L242: lea 2(%r9),%rax cmp %r14,%rax jge .L243 lea 0(,%r9,4),%rax lea (%r15,%rax),%rdx lea 8(%r15,%rax),%rax mov (%rdx),%ecx cmp (%rax),%ecx cmovg (%rax),%esi cmovg %ecx,%ecx mov %esi,(%rdx) mov %ecx,(%rax) .L243: lea 1(%r9),%rax cmp %r14,%rax jge .L263 sal $2,%r9 lea (%r15,%r9),%rdx lea 4(%r15,%r9),%rax mov (%rdx),%ecx cmp (%rax),%ecx cmovg (%rax),%esi cmovg %ecx,%ecx mov %esi,(%rdx) mov %ecx,(%rax) lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .p2align 4,,10 .p2align 3 .L265: je .L271 cmp $7,%rsi je .L272 cmp $6,%rsi je .L273 cmp $5,%rsi je .L274 cmp $4,%rsi je .L275 cmp $3,%rsi je .L276 cmp $2,%rsi jne .L263 mov (%rdi),%edx mov 4(%rdi),%ecx jmp .L217 .p2align 4,,10 .p2align 3 .L271: mov (%rdi),%ecx cmp 4(%rdi),%ecx cmovg 4(%rdi),%eax cmovg %ecx,%ecx mov %eax,%edx cmp 8(%rdi),%ecx cmovg 8(%rdi),%eax cmovg %ecx,%ecx mov %eax,%esi cmp 12(%rdi),%ecx cmovg 12(%rdi),%eax cmovg %ecx,%ecx mov %eax,%r9d cmp 16(%rdi),%ecx cmovg 16(%rdi),%eax cmovg %ecx,%ecx mov %eax,%edi cmp 20(%r15),%ecx cmovg 20(%r15),%eax cmovg %ecx,%ecx mov %eax,%r8d cmp 24(%r15),%ecx cmovg 24(%r15),%eax cmovg %ecx,%ecx mov %eax,%r10d cmp 28(%r15),%ecx cmovg 28(%r15),%eax cmovg %ecx,%ecx mov %ecx,28(%r15) mov %eax,%r11d .L207: cmp %esi,%edx cmovg %esi,%ecx cmovg %edx,%eax cmp %r9d,%eax cmovg %r9d,%esi cmovg %eax,%eax cmp %edi,%eax cmovg %edi,%r9d cmovg %eax,%eax cmp %r8d,%eax cmovg %r8d,%edi cmovg %eax,%eax cmp %r10d,%eax cmovg %r10d,%r8d cmovg %eax,%eax cmp %r11d,%eax cmovg %r11d,%r10d cmovg %eax,%eax mov %eax,24(%r15) mov %ecx,%edx .L209: cmp %esi,%edx cmovg %esi,%ecx cmovg %edx,%edx cmp %r9d,%edx cmovg %r9d,%esi cmovg %edx,%eax cmp %edi,%eax cmovg %edi,%r9d cmovg %eax,%eax cmp %r8d,%eax cmovg %r8d,%edi cmovg %eax,%eax cmp %r10d,%eax cmovg %r10d,%r8d cmovg %eax,%eax mov %eax,20(%r15) .L211: cmp %esi,%ecx cmovg %esi,%edx cmovg %ecx,%ecx cmp %r9d,%ecx cmovg %r9d,%esi cmovg %ecx,%eax mov %esi,%ecx cmp %edi,%eax cmovg %edi,%esi cmovg %eax,%eax cmp %r8d,%eax cmovg %r8d,%edi cmovg %eax,%eax mov %eax,16(%r15) .L213: cmp %ecx,%edx cmovg %ecx,%eax cmovg %edx,%edx cmp %esi,%edx cmovg %esi,%ecx cmovg %edx,%edx cmp %edi,%edx cmovg %edi,%esi cmovg %edx,%edx mov %edx,12(%r15) .L215: cmp %ecx,%eax cmovg %ecx,%edx cmovg %eax,%eax cmp %esi,%eax cmovg %esi,%ecx cmovg %eax,%eax mov %eax,8(%r15) .L217: cmp %ecx,%edx cmovg %ecx,%eax cmovg %edx,%edx mov %eax,(%r15) mov %edx,4(%r15) lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .L249: mov $64,%r12d mov $32,%ebx xor %r11d,%r11d mov $31,%r8d xor %r9d,%r9d .L236: lea (%r15,%r9,4),%rax mov %r9,%rcx cmp %r8,%r14 jle .L235 .p2align 4,,10 .p2align 3 .L238: vmovdqu 64(%rax),%ymm1 vmovdqu 96(%rax),%ymm3 vmovdqu (%rax),%ymm0 vmovdqu 32(%rax),%ymm2 vpminsd %ymm1,%ymm0,%ymm5 vpmaxsd %ymm1,%ymm0,%ymm0 vpminsd %ymm3,%ymm2,%ymm1 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm2,%ymm0,%ymm4 vpminsd %ymm1,%ymm5,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpmaxsd %ymm1,%ymm5,%ymm5 vperm2i128 $32,%ymm0,%ymm4,%ymm2 vperm2i128 $32,%ymm5,%ymm3,%ymm1 vperm2i128 $49,%ymm0,%ymm4,%ymm0 vperm2i128 $49,%ymm5,%ymm3,%ymm3 vpminsd %ymm3,%ymm1,%ymm5 vpminsd %ymm0,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm0 vperm2i128 $32,%ymm1,%ymm5,%ymm3 vperm2i128 $32,%ymm0,%ymm4,%ymm2 vperm2i128 $49,%ymm1,%ymm5,%ymm5 vperm2i128 $49,%ymm0,%ymm4,%ymm4 vpunpcklqdq %ymm5,%ymm3,%ymm1 vpunpcklqdq %ymm4,%ymm2,%ymm0 vpunpckhqdq %ymm5,%ymm3,%ymm3 vpunpckhqdq %ymm4,%ymm2,%ymm2 vpminsd %ymm3,%ymm1,%ymm5 vpminsd %ymm2,%ymm0,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpmaxsd %ymm2,%ymm0,%ymm0 vpunpckldq %ymm1,%ymm5,%ymm3 vpunpckldq %ymm0,%ymm4,%ymm2 vpunpckhdq %ymm1,%ymm5,%ymm5 vpunpckhdq %ymm0,%ymm4,%ymm4 vpunpcklqdq %ymm5,%ymm3,%ymm1 vpunpcklqdq %ymm4,%ymm2,%ymm0 vpunpckhqdq %ymm5,%ymm3,%ymm3 vpunpckhqdq %ymm4,%ymm2,%ymm2 mov %rcx,%rdx vpminsd %ymm3,%ymm1,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpunpckldq %ymm1,%ymm4,%ymm5 vpunpckldq %ymm0,%ymm3,%ymm2 vpunpckhdq %ymm1,%ymm4,%ymm1 vpunpckhdq %ymm0,%ymm3,%ymm0 add $63,%rdx vmovdqu %ymm5,(%rax) vmovdqu %ymm1,32(%rax) vmovdqu %ymm2,64(%rax) vmovdqu %ymm0,96(%rax) add $32,%rcx sub $-128,%rax cmp %rdx,%r14 jg .L238 lea -32(%r14),%rax sub %r9,%rax lea 31(%r9),%rdx and $-32,%rax cmp %rdx,%r14 mov $0,%edx cmovle %rdx,%rax lea 32(%r9,%rax),%r9 lea 64(,%r9,4),%r12 mov %r9,%r11 lea (%r15,%r9,4),%r10 lea -32(%r12),%rbx vzeroupper .L235: lea -16(%r14),%rdx sub %r9,%rdx lea (%r15,%r12),%rsi mov %r10,%rdi call minmax_vector lea 15(%r9),%rax jmp .L237 .p2align 4,,10 .p2align 3 .L220: xor %edx,%edx call djbsort$avx2_2power lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .p2align 4,,10 .p2align 3 .L246: mov %rsi,%r8 mov %r13,%r11 mov %r13,%r9 jmp .L226 .p2align 4,,10 .p2align 3 .L247: mov %r10,%r9 jmp .L228 .p2align 4,,10 .p2align 3 .L266: vmovdqa .LC4(%rip),%ymm0 mov $16,%r12d lea 32(%rsp),%r13 vmovdqa %ymm0,64(%rsp) vzeroupper jmp .L224 .p2align 4,,10 .p2align 3 .L270: cmp $63,%r14 jle .L248 lea -64(%r14),%rcx shr $6,%rcx mov %rcx,%rdx sal $8,%rdx mov %r15,%rax lea 256(%r15,%rdx),%rdx .p2align 4,,10 .p2align 3 .L233: vmovdqu 128(%rax),%ymm0 vmovdqu (%rax),%ymm3 vmovdqu 32(%rax),%ymm15 vpminsd %ymm0,%ymm3,%ymm13 vpmaxsd %ymm0,%ymm3,%ymm3 vmovdqu 160(%rax),%ymm0 vmovdqu 224(%rax),%ymm2 vmovdqu 64(%rax),%ymm6 vmovdqu 96(%rax),%ymm5 vpminsd %ymm0,%ymm15,%ymm4 vpmaxsd %ymm0,%ymm15,%ymm15 vmovdqu 192(%rax),%ymm0 add $256,%rax vpminsd %ymm0,%ymm6,%ymm1 vpmaxsd %ymm0,%ymm6,%ymm6 vpminsd %ymm2,%ymm5,%ymm0 vpmaxsd %ymm2,%ymm5,%ymm5 vpminsd %ymm0,%ymm4,%ymm11 vpminsd %ymm1,%ymm13,%ymm14 vpmaxsd %ymm0,%ymm4,%ymm4 vpminsd %ymm5,%ymm15,%ymm12 vpminsd %ymm6,%ymm3,%ymm0 vpmaxsd %ymm5,%ymm15,%ymm15 vpmaxsd %ymm6,%ymm3,%ymm3 vpmaxsd %ymm1,%ymm13,%ymm13 vpminsd %ymm15,%ymm3,%ymm8 vpminsd %ymm4,%ymm13,%ymm1 vpminsd %ymm12,%ymm0,%ymm5 vpmaxsd %ymm4,%ymm13,%ymm13 vpminsd %ymm11,%ymm14,%ymm2 vpmaxsd %ymm12,%ymm0,%ymm12 vpmaxsd %ymm11,%ymm14,%ymm14 vpmaxsd %ymm15,%ymm3,%ymm3 vperm2i128 $32,%ymm14,%ymm2,%ymm11 vperm2i128 $32,%ymm13,%ymm1,%ymm10 vperm2i128 $32,%ymm12,%ymm5,%ymm9 vperm2i128 $49,%ymm12,%ymm5,%ymm0 vperm2i128 $32,%ymm3,%ymm8,%ymm4 vperm2i128 $49,%ymm14,%ymm2,%ymm2 vperm2i128 $49,%ymm13,%ymm1,%ymm1 vperm2i128 $49,%ymm3,%ymm8,%ymm3 vpminsd %ymm2,%ymm11,%ymm15 vpminsd %ymm1,%ymm10,%ymm14 vpmaxsd %ymm2,%ymm11,%ymm2 vpmaxsd %ymm1,%ymm10,%ymm1 vpminsd %ymm0,%ymm9,%ymm13 vpminsd %ymm3,%ymm4,%ymm12 vpmaxsd %ymm0,%ymm9,%ymm0 vpmaxsd %ymm3,%ymm4,%ymm8 vperm2i128 $49,%ymm2,%ymm15,%ymm11 vperm2i128 $49,%ymm1,%ymm14,%ymm10 vperm2i128 $49,%ymm0,%ymm13,%ymm9 vperm2i128 $32,%ymm2,%ymm15,%ymm7 vperm2i128 $32,%ymm1,%ymm14,%ymm6 vperm2i128 $32,%ymm0,%ymm13,%ymm5 vperm2i128 $32,%ymm8,%ymm12,%ymm4 vperm2i128 $49,%ymm8,%ymm12,%ymm8 vpunpcklqdq %ymm11,%ymm7,%ymm3 vpunpcklqdq %ymm10,%ymm6,%ymm2 vpunpcklqdq %ymm9,%ymm5,%ymm1 vpunpcklqdq %ymm8,%ymm4,%ymm0 vpunpckhqdq %ymm11,%ymm7,%ymm7 vpunpckhqdq %ymm10,%ymm6,%ymm6 vpunpckhqdq %ymm9,%ymm5,%ymm5 vpunpckhqdq %ymm8,%ymm4,%ymm4 vpminsd %ymm3,%ymm7,%ymm11 vpminsd %ymm2,%ymm6,%ymm10 vpminsd %ymm1,%ymm5,%ymm9 vpminsd %ymm0,%ymm4,%ymm8 vpmaxsd %ymm3,%ymm7,%ymm7 vpmaxsd %ymm2,%ymm6,%ymm6 vpmaxsd %ymm1,%ymm5,%ymm5 vpmaxsd %ymm0,%ymm4,%ymm4 vpunpckldq %ymm7,%ymm11,%ymm3 vpunpckldq %ymm6,%ymm10,%ymm2 vpunpckhdq %ymm7,%ymm11,%ymm7 vpunpckhdq %ymm6,%ymm10,%ymm6 vpunpckldq %ymm5,%ymm9,%ymm1 vpunpckldq %ymm4,%ymm8,%ymm0 vpunpckhdq %ymm5,%ymm9,%ymm5 vpunpckhdq %ymm4,%ymm8,%ymm4 vpunpcklqdq %ymm7,%ymm3,%ymm10 vpunpcklqdq %ymm5,%ymm1,%ymm8 vpunpckhqdq %ymm7,%ymm3,%ymm3 vpunpcklqdq %ymm6,%ymm2,%ymm9 vpunpcklqdq %ymm4,%ymm0,%ymm7 vpunpckhqdq %ymm6,%ymm2,%ymm2 vpunpckhqdq %ymm5,%ymm1,%ymm1 vpunpckhqdq %ymm4,%ymm0,%ymm0 vpminsd %ymm8,%ymm1,%ymm5 vpminsd %ymm9,%ymm2,%ymm6 vpminsd %ymm7,%ymm0,%ymm4 vpminsd %ymm10,%ymm3,%ymm11 vpmaxsd %ymm8,%ymm1,%ymm1 vpmaxsd %ymm7,%ymm0,%ymm0 vpmaxsd %ymm10,%ymm3,%ymm3 vpmaxsd %ymm9,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm6,%ymm7 vpunpckldq %ymm3,%ymm11,%ymm8 vpunpckhdq %ymm2,%ymm6,%ymm2 vpunpckhdq %ymm3,%ymm11,%ymm3 vpunpckldq %ymm1,%ymm5,%ymm6 vpunpckhdq %ymm1,%ymm5,%ymm1 vpunpckldq %ymm0,%ymm4,%ymm5 vpunpckhdq %ymm0,%ymm4,%ymm0 vmovdqu %ymm8,-256(%rax) vmovdqu %ymm3,-224(%rax) vmovdqu %ymm7,-192(%rax) vmovdqu %ymm2,-160(%rax) vmovdqu %ymm6,-128(%rax) vmovdqu %ymm1,-96(%rax) vmovdqu %ymm5,-64(%rax) vmovdqu %ymm0,-32(%rax) cmp %rax,%rdx jne .L233 lea 1(%rcx),%rax mov %rax,%r9 sal $6,%r9 lea 128(,%r9,4),%rcx sal $8,%rax mov %r9,%r11 lea (%r15,%rax),%r10 lea -96(%rcx),%rbx lea -64(%rcx),%r12 lea 31(%r9),%r8 vzeroupper .L232: lea -32(%r14),%rdx sub %r9,%rdx lea (%r15,%rcx),%rsi mov %r10,%rdi call minmax_vector jmp .L236 .L272: mov (%rdi),%edx mov 4(%rdi),%esi mov 8(%rdi),%r9d mov 16(%r15),%r8d mov 12(%rdi),%edi mov 20(%r15),%r10d mov 24(%r15),%r11d jmp .L207 .L248: mov %r15,%r10 mov $64,%r12d mov $32,%ebx mov $31,%r8d mov $128,%ecx xor %r11d,%r11d xor %r9d,%r9d jmp .L232 .L276: mov (%rdi),%eax mov 4(%rdi),%ecx mov 8(%rdi),%esi jmp .L215 .L275: mov (%rdi),%edx mov 4(%rdi),%ecx mov 8(%rdi),%esi mov 12(%rdi),%edi jmp .L213 .L274: mov (%rdi),%ecx mov 4(%rdi),%esi mov 8(%rdi),%r9d mov 16(%r15),%r8d mov 12(%rdi),%edi jmp .L211 .L273: mov (%rdi),%edx mov 4(%rdi),%esi mov 8(%rdi),%r9d mov 16(%r15),%r8d mov 12(%rdi),%edi mov 20(%r15),%r10d jmp .L209 .endfn djbsort$avx2,globl .rodata.cst32 .LC0: .quad -1,0,-1,0 .LC1: .quad 0,-1,-1,0 .LC2: .quad -1,-1,0,0 .LC3: .quad -4294967296,4294967295,-4294967296,4294967295 .LC4: .quad 0x7fffffff7fffffff,0x7fffffff7fffffff .quad 0x7fffffff7fffffff,0x7fffffff7fffffff