cosmopolitan/libc/nexgen32e/djbsort-avx2.S

2328 lines
49 KiB
ArmAsm

/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
.source __FILE__
.p2align 4
minmax_vector:
cmp $7,%rdx
jle .L27
test $7,%dl
je .L5
lea -32(,%rdx,4),%rax
lea (%rdi,%rax),%rcx
add %rsi,%rax
vmovdqu (%rax),%ymm0
vmovdqu (%rcx),%ymm1
and $-8,%rdx
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,(%rcx)
vmovdqu %ymm0,(%rax)
.p2align 4,,10
.p2align 3
.L5: vmovdqu (%rsi),%ymm1
vmovdqu (%rdi),%ymm0
add $32,%rsi
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,(%rdi)
vmovdqu %ymm0,-32(%rsi)
add $32,%rdi
sub $8,%rdx
jne .L5
vzeroupper
.L25: ret
.p2align 4,,10
.p2align 3
.L27: test %rdx,%rdx
jle .L25
mov (%rdi),%eax
cmp (%rsi),%eax
cmovg (%rsi),%ecx
cmovg %eax,%eax
mov %ecx,(%rdi)
mov %eax,(%rsi)
cmp $1,%rdx
je .L25
mov 4(%rdi),%eax
cmp 4(%rsi),%eax
cmovg 4(%rsi),%ecx
cmovg %eax,%eax
mov %ecx,4(%rdi)
mov %eax,4(%rsi)
cmp $2,%rdx
je .L25
mov 8(%rdi),%eax
cmp 8(%rsi),%eax
cmovg 8(%rsi),%ecx
cmovg %eax,%eax
mov %ecx,8(%rdi)
mov %eax,8(%rsi)
cmp $3,%rdx
je .L25
mov 12(%rdi),%eax
cmp 12(%rsi),%eax
cmovg 12(%rsi),%ecx
cmovg %eax,%eax
mov %ecx,12(%rdi)
mov %eax,12(%rsi)
cmp $4,%rdx
je .L25
mov 16(%rdi),%eax
cmp 16(%rsi),%eax
cmovg 16(%rsi),%ecx
cmovg %eax,%eax
mov %ecx,16(%rdi)
mov %eax,16(%rsi)
cmp $5,%rdx
je .L25
mov 20(%rdi),%eax
cmp 20(%rsi),%eax
cmovg 20(%rsi),%ecx
cmovg %eax,%eax
mov %ecx,20(%rdi)
mov %eax,20(%rsi)
cmp $7,%rdx
jne .L25
mov 24(%rdi),%eax
cmp 24(%rsi),%eax
cmovg 24(%rsi),%edx
cmovg %eax,%eax
mov %edx,24(%rdi)
mov %eax,24(%rsi)
ret
.endfn minmax_vector,globl
.p2align 4
int32_twostages_32:
test %rsi,%rsi
jle .L33
lea -128(%rsi),%rax
dec %rsi
and $-128,%rsi
mov %rax,%rdx
sub %rsi,%rdx
jmp .L30
.p2align 4,,10
.p2align 3
.L34: add $-128,%rax
.L30: vmovdqu 256(%rdi),%ymm1
vmovdqu (%rdi),%ymm0
vmovdqu 384(%rdi),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu 128(%rdi),%ymm1
add $512,%rdi
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm0,-128(%rdi)
vmovdqu -224(%rdi),%ymm1
vmovdqu -480(%rdi),%ymm0
vmovdqu %ymm4,-512(%rdi)
vmovdqu %ymm2,-384(%rdi)
vmovdqu -96(%rdi),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu -352(%rdi),%ymm1
vmovdqu %ymm3,-256(%rdi)
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm0,-96(%rdi)
vmovdqu -192(%rdi),%ymm1
vmovdqu -448(%rdi),%ymm0
vmovdqu %ymm4,-480(%rdi)
vmovdqu %ymm2,-352(%rdi)
vmovdqu -64(%rdi),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu -320(%rdi),%ymm1
vmovdqu %ymm3,-224(%rdi)
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm0,-64(%rdi)
vmovdqu -160(%rdi),%ymm1
vmovdqu -416(%rdi),%ymm0
vmovdqu %ymm4,-448(%rdi)
vmovdqu %ymm2,-320(%rdi)
vmovdqu -32(%rdi),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu -288(%rdi),%ymm1
vmovdqu %ymm3,-192(%rdi)
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,-416(%rdi)
vmovdqu %ymm2,-288(%rdi)
vmovdqu %ymm3,-160(%rdi)
vmovdqu %ymm0,-32(%rdi)
cmp %rdx,%rax
jne .L34
vzeroupper
.L33: ret
.endfn int32_twostages_32,globl
.p2align 4
int32_threestages:
push %rbp
mov %rsp,%rbp
push %r15
push %r14
lea 0(,%rdx,8),%r14
push %r13
push %r12
push %rbx
and $-32,%rsp
sub $32,%rsp
mov %rsi,16(%rsp)
cmp %r14,%rsi
jl .L41
lea -1(%rdx),%rax
and $-8,%rax
lea (%rdx,%rdx),%r8
mov %rax,8(%rsp)
lea (%r8,%rdx),%rcx
lea 0(,%rdx,4),%rsi
mov %r14,%r9
mov %rdi,%r13
lea (%rsi,%rdx),%r11
lea (%rcx,%rcx),%r10
sub %rdx,%r9
xor %r12d,%r12d
mov %r14,%rbx
lea 32(%rdi),%r15
.p2align 4,,10
.p2align 3
.L37: mov %r12,%rdi
lea (%rdx,%rdi),%rax
mov %rbx,24(%rsp)
mov %rbx,%r12
cmp %rax,%rdi
jge .L40
lea 0(%r13,%rdi,4),%rax
add 8(%rsp),%rdi
lea (%r15,%rdi,4),%rdi
.p2align 4,,10
.p2align 3
.L38: vmovdqu (%rax,%rsi,4),%ymm0
vmovdqu (%rax),%ymm6
vmovdqu (%rax,%rdx,4),%ymm1
vpminsd %ymm0,%ymm6,%ymm7
vpmaxsd %ymm0,%ymm6,%ymm6
vmovdqu (%rax,%r11,4),%ymm0
vmovdqu (%rax,%r9,4),%ymm8
vpmaxsd %ymm0,%ymm1,%ymm3
vpminsd %ymm0,%ymm1,%ymm2
vmovdqu (%rax,%r10,4),%ymm1
vmovdqu (%rax,%r8,4),%ymm0
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu (%rax,%rcx,4),%ymm1
vpminsd %ymm8,%ymm1,%ymm5
vpmaxsd %ymm8,%ymm1,%ymm1
vpminsd %ymm4,%ymm7,%ymm8
vpmaxsd %ymm4,%ymm7,%ymm4
vpminsd %ymm5,%ymm2,%ymm7
vpmaxsd %ymm5,%ymm2,%ymm2
vpminsd %ymm0,%ymm6,%ymm5
vpmaxsd %ymm0,%ymm6,%ymm0
vpminsd %ymm1,%ymm3,%ymm6
vpmaxsd %ymm1,%ymm3,%ymm1
vpminsd %ymm7,%ymm8,%ymm9
vpmaxsd %ymm7,%ymm8,%ymm3
vpminsd %ymm2,%ymm4,%ymm8
vpminsd %ymm6,%ymm5,%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2
vpmaxsd %ymm6,%ymm5,%ymm5
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm9,(%rax)
vmovdqu %ymm3,(%rax,%rdx,4)
vmovdqu %ymm8,(%rax,%r8,4)
vmovdqu %ymm2,(%rax,%rcx,4)
vmovdqu %ymm7,(%rax,%rsi,4)
vmovdqu %ymm5,(%rax,%r11,4)
vmovdqu %ymm4,(%rax,%r10,4)
vmovdqu %ymm0,(%rax,%r9,4)
add $32,%rax
cmp %rax,%rdi
jne .L38
.L40: add %r14,%rbx
cmp %rbx,16(%rsp)
jge .L37
vzeroupper
.L35: mov 24(%rsp),%rax
lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.L41: movq $0,24(%rsp)
jmp .L35
.endfn int32_threestages,globl
.p2align 4
merge16_finish:
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm3,%ymm2
vperm2i128 $49,%ymm0,%ymm3,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm3
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vperm2i128 $32,%ymm0,%ymm1,%ymm2
vperm2i128 $49,%ymm0,%ymm1,%ymm0
test %esi,%esi
je .L46
vpcmpeqd %ymm1,%ymm1,%ymm1
vpxor %ymm1,%ymm2,%ymm2
vpxor %ymm1,%ymm0,%ymm0
.L46: vmovdqu %ymm2,(%rdi)
vmovdqu %ymm0,32(%rdi)
ret
.endfn merge16_finish,globl
.p2align 4
djbsort$avx2_2power:
push %r13
mov %rdi,%r11
lea 16(%rsp),%r13
and $-32,%rsp
push -8(%r13)
push %rbp
mov %rsp,%rbp
push %r15
push %r14
push %r13
push %r12
push %rbx
sub $200,%rsp
mov %rsi,-144(%rbp)
mov %edx,-164(%rbp)
cmp $8,%rsi
je .L194
cmpq $16,-144(%rbp)
je .L195
cmpq $32,-144(%rbp)
je .L196
mov %rsi,%r15
sar $3,%r15
test %r15,%r15
jle .L197
lea -1(%r15),%rbx
mov %rbx,-200(%rbp)
shr $3,%rbx
mov %rbx,%rdx
lea 32(%r11),%rbx
lea (%r15,%r15),%r8
mov %rbx,-120(%rbp)
lea 0(,%r15,4),%rsi
lea (%r8,%r15),%rdi
lea 0(,%r15,8),%rcx
sal $5,%rdx
lea (%rdi,%rdi),%r10
lea (%rsi,%r15),%r9
sub %r15,%rcx
mov %r11,%rax
add %rbx,%rdx
.L61: vmovdqu (%rax),%ymm0
vmovdqu (%rax,%rsi,4),%ymm2
vmovdqu (%rax,%r10,4),%ymm3
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm2,%ymm0,%ymm2
vmovdqu (%rax,%r8,4),%ymm0
vpminsd %ymm3,%ymm0,%ymm1
vpmaxsd %ymm3,%ymm0,%ymm0
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm4,%ymm1,%ymm2
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm1,%ymm3,%ymm4
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm0,(%rax)
vmovdqu %ymm4,(%rax,%r8,4)
vmovdqu %ymm1,(%rax,%rsi,4)
vmovdqu %ymm2,(%rax,%r10,4)
vmovdqu (%rax,%r15,4),%ymm2
vmovdqu (%rax,%r9,4),%ymm0
vmovdqu (%rax,%rdi,4),%ymm4
vpminsd %ymm2,%ymm0,%ymm1
vpmaxsd %ymm2,%ymm0,%ymm0
vmovdqu (%rax,%rcx,4),%ymm2
vpminsd %ymm4,%ymm2,%ymm3
vpmaxsd %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm1,%ymm3,%ymm2
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm4,(%rax,%r15,4)
vmovdqu %ymm1,(%rax,%rdi,4)
vmovdqu %ymm2,(%rax,%r9,4)
vmovdqu %ymm0,(%rax,%rcx,4)
add $32,%rax
cmp %rdx,%rax
jne .L61
.L62: lea 0(,%r15,8),%rax
sub %r15,%rax
lea (%r15,%r15),%r12
mov %rax,%r9
mov -144(%rbp),%rax
lea 0(,%r15,4),%rbx
lea (%r12,%r15),%r13
lea (%rbx,%r15),%r10
lea (%r13,%r13),%r14
cmp $127,%rax
jg .L59
lea 64(%r11),%rdi
dec %rax
mov %rdi,-192(%rbp)
mov %rax,-176(%rbp)
.L60: mov -144(%rbp),%rdi
mov %r11,-208(%rbp)
lea (%r11,%rdi,4),%rax
mov %rax,-112(%rbp)
mov %rdi,%rax
sar $4,%rax
cmp $32,%rax
sete %dl
cmp $127,%rax
mov %rax,-80(%rbp)
setg %al
or %eax,%edx
mov -176(%rbp),%rax
mov %dl,-152(%rbp)
shr $4,%rax
sal $6,%rax
add -192(%rbp),%rax
mov %rax,-128(%rbp)
mov -200(%rbp),%rax
movl $3,-184(%rbp)
shr $3,%rax
sal $5,%rax
add -120(%rbp),%rax
mov %rax,-160(%rbp)
movq $4,-136(%rbp)
mov %r12,-200(%rbp)
mov %r13,-216(%rbp)
mov %r10,-224(%rbp)
mov %r9,-232(%rbp)
vmovdqa .LC1(%rip),%ymm11
vmovdqa .LC3(%rip),%ymm10
vmovdqa .LC2(%rip),%ymm12
mov %rbx,-192(%rbp)
mov %rdi,%rbx
.L63: cmpq $4,-136(%rbp)
je .L198
cmpq $2,-136(%rbp)
je .L91
mov -112(%rbp),%rdx
mov %r11,%rax
cmp -112(%rbp),%r11
je .L90
.L92: vpxor 32(%rax),%ymm10,%ymm2
vpxor (%rax),%ymm10,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm2
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,%rdx
jne .L92
.L90: cmpb $0,-152(%rbp)
mov -80(%rbp),%r12
je .L89
mov %rbx,%r13
mov %r11,%rbx
.p2align 4,,10
.p2align 3
.L146: mov %r12,%rdx
sar $2,%rdx
mov %r13,%rsi
mov %rbx,%rdi
vzeroupper
sar $3,%r12
call int32_threestages
cmp $127,%r12
vmovdqa .LC1(%rip),%ymm11
vmovdqa .LC3(%rip),%ymm10
vmovdqa .LC2(%rip),%ymm12
jg .L146
cmp $32,%r12
je .L146
mov %rbx,%r11
mov %r13,%rbx
.L89: cmp $15,%r12
jle .L94
mov -120(%rbp),%r13
.p2align 4,,10
.p2align 3
.L100: mov %r12,%rdx
sar %rdx
test %rbx,%rbx
jle .L95
lea (%rdx,%rdx),%rcx
lea -1(%rdx),%r9
lea (%rcx,%rdx),%rsi
lea 0(,%rdx,4),%r10
xor %r8d,%r8d
and $-8,%r9
.p2align 4,,10
.p2align 3
.L96: lea (%rdx,%r8),%rax
cmp %rax,%r8
jge .L99
lea (%r9,%r8),%rdi
lea (%r11,%r8,4),%rax
lea 0(%r13,%rdi,4),%rdi
.p2align 4,,10
.p2align 3
.L97: vmovdqu (%rax,%rcx,4),%ymm1
vmovdqu (%rax),%ymm0
vmovdqu (%rax,%rsi,4),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu (%rax,%rdx,4),%ymm1
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%rax)
vmovdqu %ymm2,(%rax,%rdx,4)
vmovdqu %ymm3,(%rax,%rcx,4)
vmovdqu %ymm0,(%rax,%rsi,4)
add $32,%rax
cmp %rdi,%rax
jne .L97
.L99: add %r10,%r8
cmp %r8,%rbx
jg .L96
.L95: sar $2,%r12
cmp $15,%r12
jg .L100
.L94: cmp $8,%r12
je .L101
.L104: mov %r11,%rax
test %r15,%r15
jle .L103
mov -160(%rbp),%r9
mov -192(%rbp),%rdx
mov -200(%rbp),%rcx
mov -216(%rbp),%rsi
mov -224(%rbp),%rdi
mov -232(%rbp),%r8
.p2align 4,,10
.p2align 3
.L102: vmovdqu (%rax,%r15,4),%ymm0
vmovdqu (%rax),%ymm1
vmovdqu (%rax,%rcx,4),%ymm2
vmovdqu (%rax,%rdi,4),%ymm4
vmovdqu (%rax,%rdx,4),%ymm7
vpminsd %ymm0,%ymm1,%ymm5
vpmaxsd %ymm0,%ymm1,%ymm1
vmovdqu (%rax,%rsi,4),%ymm0
vmovdqu (%rax,%r14,4),%ymm8
vpminsd %ymm0,%ymm2,%ymm3
vpmaxsd %ymm0,%ymm2,%ymm0
vpminsd %ymm4,%ymm7,%ymm2
vpmaxsd %ymm4,%ymm7,%ymm7
vmovdqu (%rax,%r8,4),%ymm4
vpminsd %ymm3,%ymm5,%ymm9
vpminsd %ymm4,%ymm8,%ymm6
vpmaxsd %ymm4,%ymm8,%ymm4
vpmaxsd %ymm3,%ymm5,%ymm5
vpminsd %ymm0,%ymm1,%ymm8
vpminsd %ymm6,%ymm2,%ymm3
vpmaxsd %ymm0,%ymm1,%ymm0
vpmaxsd %ymm6,%ymm2,%ymm1
vpminsd %ymm4,%ymm7,%ymm2
vpmaxsd %ymm4,%ymm7,%ymm4
vpminsd %ymm3,%ymm9,%ymm6
vpminsd %ymm2,%ymm8,%ymm7
vpmaxsd %ymm3,%ymm9,%ymm3
vpmaxsd %ymm2,%ymm8,%ymm2
vpminsd %ymm1,%ymm5,%ymm8
vpmaxsd %ymm1,%ymm5,%ymm1
vpminsd %ymm4,%ymm0,%ymm5
vpmaxsd %ymm4,%ymm0,%ymm0
vmovdqu %ymm6,(%rax)
vmovdqu %ymm7,(%rax,%r15,4)
vmovdqu %ymm8,(%rax,%rcx,4)
vmovdqu %ymm5,(%rax,%rsi,4)
vmovdqu %ymm3,(%rax,%rdx,4)
vmovdqu %ymm2,(%rax,%rdi,4)
vmovdqu %ymm1,(%rax,%r14,4)
vmovdqu %ymm0,(%rax,%r8,4)
add $32,%rax
cmp %rax,%r9
jne .L102
.L103: sarq -136(%rbp)
decl -184(%rbp)
jne .L63
cmpq $0,-144(%rbp)
jle .L113
mov -176(%rbp),%rax
vpcmpeqd %ymm4,%ymm4,%ymm4
shr $6,%rax
sal $8,%rax
lea 256(%r11,%rax),%rdx
mov %r11,%rax
jmp .L112
.L199: vpxor %ymm4,%ymm7,%ymm7
vpxor %ymm4,%ymm2,%ymm2
vpxor %ymm4,%ymm1,%ymm1
vpxor %ymm4,%ymm0,%ymm0
.L111: vperm2i128 $32,%ymm5,%ymm9,%ymm11
vperm2i128 $32,%ymm6,%ymm10,%ymm3
vperm2i128 $32,%ymm1,%ymm7,%ymm12
vperm2i128 $32,%ymm0,%ymm2,%ymm8
vperm2i128 $49,%ymm6,%ymm10,%ymm6
vperm2i128 $49,%ymm5,%ymm9,%ymm9
vperm2i128 $49,%ymm1,%ymm7,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vpminsd %ymm3,%ymm12,%ymm7
vpmaxsd %ymm11,%ymm8,%ymm2
vpminsd %ymm9,%ymm0,%ymm10
vpminsd %ymm6,%ymm1,%ymm5
vpmaxsd %ymm9,%ymm0,%ymm0
vpmaxsd %ymm3,%ymm12,%ymm3
vpmaxsd %ymm6,%ymm1,%ymm1
vpminsd %ymm11,%ymm8,%ymm12
vpminsd %ymm12,%ymm7,%ymm9
vpmaxsd %ymm12,%ymm7,%ymm6
vpminsd %ymm10,%ymm5,%ymm8
vpminsd %ymm2,%ymm3,%ymm7
vpmaxsd %ymm10,%ymm5,%ymm5
vpmaxsd %ymm2,%ymm3,%ymm3
vpminsd %ymm0,%ymm1,%ymm2
vpmaxsd %ymm0,%ymm1,%ymm1
vpminsd %ymm8,%ymm9,%ymm10
vpmaxsd %ymm5,%ymm6,%ymm0
vpmaxsd %ymm8,%ymm9,%ymm8
vpminsd %ymm2,%ymm7,%ymm9
vpmaxsd %ymm2,%ymm7,%ymm7
vpminsd %ymm5,%ymm6,%ymm2
vpminsd %ymm1,%ymm3,%ymm5
vpmaxsd %ymm1,%ymm3,%ymm3
vpunpckldq %ymm9,%ymm10,%ymm11
vpunpckhdq %ymm9,%ymm10,%ymm6
vpunpckldq %ymm7,%ymm8,%ymm1
vpunpckldq %ymm5,%ymm2,%ymm9
vpunpckldq %ymm3,%ymm0,%ymm10
vpunpckhdq %ymm5,%ymm2,%ymm2
vpunpckhdq %ymm3,%ymm0,%ymm0
vpunpckhdq %ymm7,%ymm8,%ymm5
vpunpcklqdq %ymm9,%ymm11,%ymm3
vpunpcklqdq %ymm2,%ymm6,%ymm8
vpunpckhqdq %ymm9,%ymm11,%ymm7
vpunpckhqdq %ymm2,%ymm6,%ymm6
vpunpcklqdq %ymm0,%ymm5,%ymm9
vpunpcklqdq %ymm10,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm5,%ymm0
vpunpckhqdq %ymm10,%ymm1,%ymm1
vperm2i128 $32,%ymm2,%ymm3,%ymm12
vperm2i128 $32,%ymm1,%ymm7,%ymm11
vperm2i128 $32,%ymm0,%ymm6,%ymm5
vperm2i128 $49,%ymm2,%ymm3,%ymm3
vperm2i128 $32,%ymm9,%ymm8,%ymm10
vperm2i128 $49,%ymm1,%ymm7,%ymm2
vperm2i128 $49,%ymm0,%ymm6,%ymm0
vperm2i128 $49,%ymm9,%ymm8,%ymm1
vmovdqu %ymm12,(%rax)
vmovdqu %ymm11,32(%rax)
vmovdqu %ymm10,64(%rax)
vmovdqu %ymm5,96(%rax)
vmovdqu %ymm3,128(%rax)
vmovdqu %ymm2,160(%rax)
vmovdqu %ymm1,192(%rax)
vmovdqu %ymm0,224(%rax)
add $256,%rax
cmp %rdx,%rax
je .L113
.L112: vmovdqu 32(%rax),%ymm0
vmovdqu (%rax),%ymm2
vmovdqu 128(%rax),%ymm3
vpunpckhdq %ymm0,%ymm2,%ymm5
vpunpckldq %ymm0,%ymm2,%ymm7
vmovdqu 96(%rax),%ymm0
vmovdqu 64(%rax),%ymm2
vmovdqu 224(%rax),%ymm9
vpunpckldq %ymm0,%ymm2,%ymm6
vpunpckhdq %ymm0,%ymm2,%ymm2
vmovdqu 160(%rax),%ymm0
mov -164(%rbp),%ebx
vpunpckldq %ymm0,%ymm3,%ymm1
vpunpckhdq %ymm0,%ymm3,%ymm0
vmovdqu 192(%rax),%ymm3
vpunpcklqdq %ymm6,%ymm7,%ymm10
vpunpckldq %ymm9,%ymm3,%ymm8
vpunpckhdq %ymm9,%ymm3,%ymm3
vpunpckhqdq %ymm6,%ymm7,%ymm7
vpunpcklqdq %ymm2,%ymm5,%ymm9
vpunpcklqdq %ymm8,%ymm1,%ymm6
vpunpckhqdq %ymm2,%ymm5,%ymm2
vpunpckhqdq %ymm8,%ymm1,%ymm1
vpunpcklqdq %ymm3,%ymm0,%ymm5
vpunpckhqdq %ymm3,%ymm0,%ymm0
test %ebx,%ebx
jne .L199
vpxor %ymm4,%ymm10,%ymm10
vpxor %ymm4,%ymm9,%ymm9
vpxor %ymm4,%ymm6,%ymm6
vpxor %ymm4,%ymm5,%ymm5
jmp .L111
.L91: mov -112(%rbp),%rdx
cmp %rdx,%r11
je .L90
mov %r11,%rax
.L93: vpxor 32(%rax),%ymm11,%ymm2
vpxor (%rax),%ymm11,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,%rdx
jne .L93
jmp .L90
.L101: test %rbx,%rbx
jle .L104
mov %r11,%rax
.L105: vmovdqu 32(%rax),%ymm1
vmovdqu (%rax),%ymm0
add $64,%rax
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,-128(%rbp)
jne .L105
jmp .L104
.L198: mov %r11,%rax
cmp -112(%rbp),%r11
je .L90
.L87: vpxor 32(%rax),%ymm12,%ymm0
vpxor (%rax),%ymm12,%ymm1
vmovdqu %ymm0,32(%rax)
vmovdqu %ymm1,(%rax)
add $64,%rax
cmp %rax,-112(%rbp)
jne .L87
jmp .L90
.L113: cmpb $0,-152(%rbp)
mov -80(%rbp),%r13
je .L109
mov %r15,-112(%rbp)
mov -120(%rbp),%r15
.L145: mov -80(%rbp),%rdx
sar $2,%rdx
cmpq $0,-144(%rbp)
jle .L114
lea (%rdx,%rdx),%rdi
lea 0(,%rdx,8),%r14
lea (%rdi,%rdx),%rcx
lea 0(,%rdx,4),%rsi
mov %r14,%r8
lea -1(%rdx),%r13
lea (%rsi,%rdx),%r10
lea (%rcx,%rcx),%r9
sub %rdx,%r8
xor %r12d,%r12d
and $-8,%r13
.p2align 4,,10
.p2align 3
.L115: lea (%rdx,%r12),%rax
cmp %rax,%r12
jge .L118
lea 0(%r13,%r12),%rbx
lea (%r11,%r12,4),%rax
lea (%r15,%rbx,4),%rbx
.p2align 4,,10
.p2align 3
.L116: vmovdqu (%rax,%rsi,4),%ymm0
vmovdqu (%rax),%ymm6
vmovdqu (%rax,%rdx,4),%ymm1
vpminsd %ymm0,%ymm6,%ymm7
vpmaxsd %ymm0,%ymm6,%ymm6
vmovdqu (%rax,%r10,4),%ymm0
vmovdqu (%rax,%r8,4),%ymm8
vpmaxsd %ymm0,%ymm1,%ymm3
vpminsd %ymm0,%ymm1,%ymm2
vmovdqu (%rax,%r9,4),%ymm1
vmovdqu (%rax,%rdi,4),%ymm0
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu (%rax,%rcx,4),%ymm1
vpminsd %ymm8,%ymm1,%ymm5
vpmaxsd %ymm8,%ymm1,%ymm1
vpminsd %ymm4,%ymm7,%ymm8
vpmaxsd %ymm4,%ymm7,%ymm4
vpminsd %ymm5,%ymm2,%ymm7
vpmaxsd %ymm5,%ymm2,%ymm2
vpminsd %ymm0,%ymm6,%ymm5
vpmaxsd %ymm0,%ymm6,%ymm0
vpminsd %ymm1,%ymm3,%ymm6
vpmaxsd %ymm1,%ymm3,%ymm1
vpminsd %ymm7,%ymm8,%ymm9
vpmaxsd %ymm7,%ymm8,%ymm3
vpminsd %ymm2,%ymm4,%ymm8
vpminsd %ymm6,%ymm5,%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2
vpmaxsd %ymm6,%ymm5,%ymm5
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm9,(%rax)
vmovdqu %ymm3,(%rax,%rdx,4)
vmovdqu %ymm8,(%rax,%rdi,4)
vmovdqu %ymm2,(%rax,%rcx,4)
vmovdqu %ymm7,(%rax,%rsi,4)
vmovdqu %ymm5,(%rax,%r10,4)
vmovdqu %ymm4,(%rax,%r9,4)
vmovdqu %ymm0,(%rax,%r8,4)
add $32,%rax
cmp %rbx,%rax
jne .L116
.L118: add %r14,%r12
cmp %r12,-144(%rbp)
jg .L115
.L114: sarq $3,-80(%rbp)
mov -80(%rbp),%rax
cmp $127,%rax
jg .L145
cmp $32,%rax
je .L145
mov -112(%rbp),%r15
mov %rax,%r13
.L109: cmp $15,%r13
jle .L119
mov -144(%rbp),%r10
mov -120(%rbp),%r12
.L125: mov %r13,%rdx
sar %rdx
test %r10,%r10
jle .L120
lea (%rdx,%rdx),%rcx
lea -1(%rdx),%r9
lea (%rcx,%rdx),%rsi
lea 0(,%rdx,4),%rbx
xor %r8d,%r8d
and $-8,%r9
.p2align 4,,10
.p2align 3
.L121: lea (%rdx,%r8),%rax
cmp %rax,%r8
jge .L124
lea (%r9,%r8),%rdi
lea (%r11,%r8,4),%rax
lea (%r12,%rdi,4),%rdi
.p2align 4,,10
.p2align 3
.L122: vmovdqu (%rax,%rcx,4),%ymm1
vmovdqu (%rax),%ymm0
vmovdqu (%rax,%rsi,4),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu (%rax,%rdx,4),%ymm1
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%rax)
vmovdqu %ymm2,(%rax,%rdx,4)
vmovdqu %ymm3,(%rax,%rcx,4)
vmovdqu %ymm0,(%rax,%rsi,4)
add $32,%rax
cmp %rax,%rdi
jne .L122
.L124: add %rbx,%r8
cmp %r8,%r10
jg .L121
.L120: sar $2,%r13
cmp $15,%r13
jg .L125
mov %r13,-80(%rbp)
.L119: cmpq $8,-80(%rbp)
je .L126
.L129: test %r15,%r15
jle .L192
lea (%r15,%r15),%rsi
lea (%rsi,%r15),%rdx
lea 0(,%r15,4),%rcx
lea 0(,%r15,8),%rax
mov -208(%rbp),%r9
lea (%rcx,%r15),%r8
lea (%rdx,%rdx),%rdi
sub %r15,%rax
vpcmpeqd %ymm6,%ymm6,%ymm6
.L132: vmovdqu (%r9,%r15,4),%ymm1
vmovdqu (%r9),%ymm0
vmovdqu (%r9,%r8,4),%ymm8
vpmaxsd %ymm0,%ymm1,%ymm4
vpminsd %ymm0,%ymm1,%ymm5
vmovdqu (%r9,%rdx,4),%ymm0
vmovdqu (%r9,%rsi,4),%ymm1
vmovdqu (%r9,%rdi,4),%ymm7
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm1
vmovdqu (%r9,%rcx,4),%ymm0
mov -164(%rbp),%r10d
vpminsd %ymm0,%ymm8,%ymm2
vpmaxsd %ymm0,%ymm8,%ymm8
vmovdqu (%r9,%rax,4),%ymm0
vpminsd %ymm7,%ymm0,%ymm10
vpmaxsd %ymm7,%ymm0,%ymm0
vpminsd %ymm10,%ymm2,%ymm9
vpminsd %ymm3,%ymm5,%ymm7
vpmaxsd %ymm10,%ymm2,%ymm2
vpmaxsd %ymm3,%ymm5,%ymm5
vpminsd %ymm1,%ymm4,%ymm3
vpmaxsd %ymm1,%ymm4,%ymm1
vpminsd %ymm0,%ymm8,%ymm4
vpmaxsd %ymm0,%ymm8,%ymm8
vpminsd %ymm4,%ymm3,%ymm11
vpminsd %ymm9,%ymm7,%ymm0
vpmaxsd %ymm4,%ymm3,%ymm3
vpmaxsd %ymm9,%ymm7,%ymm7
vpminsd %ymm8,%ymm1,%ymm4
vpminsd %ymm2,%ymm5,%ymm9
vpmaxsd %ymm8,%ymm1,%ymm1
vpmaxsd %ymm2,%ymm5,%ymm2
vpunpckldq %ymm3,%ymm11,%ymm10
vpunpckhdq %ymm2,%ymm9,%ymm5
vpunpckhdq %ymm3,%ymm11,%ymm3
vpunpckldq %ymm7,%ymm0,%ymm8
vpunpckldq %ymm2,%ymm9,%ymm11
vpunpckhdq %ymm7,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm4,%ymm9
vpunpckhdq %ymm1,%ymm4,%ymm4
vpunpcklqdq %ymm5,%ymm0,%ymm2
vpunpcklqdq %ymm9,%ymm10,%ymm13
vpunpcklqdq %ymm4,%ymm3,%ymm12
vpunpcklqdq %ymm11,%ymm8,%ymm7
vpunpckhqdq %ymm9,%ymm10,%ymm1
vpunpckhqdq %ymm11,%ymm8,%ymm8
vpunpckhqdq %ymm4,%ymm3,%ymm4
vpunpckhqdq %ymm5,%ymm0,%ymm0
vperm2i128 $32,%ymm12,%ymm2,%ymm10
vperm2i128 $32,%ymm1,%ymm8,%ymm9
vperm2i128 $32,%ymm4,%ymm0,%ymm5
vperm2i128 $32,%ymm13,%ymm7,%ymm11
vperm2i128 $49,%ymm13,%ymm7,%ymm3
vperm2i128 $49,%ymm12,%ymm2,%ymm2
vperm2i128 $49,%ymm1,%ymm8,%ymm1
vperm2i128 $49,%ymm4,%ymm0,%ymm0
test %r10d,%r10d
je .L131
vpxor %ymm6,%ymm11,%ymm11
vpxor %ymm6,%ymm10,%ymm10
vpxor %ymm6,%ymm9,%ymm9
vpxor %ymm6,%ymm5,%ymm5
vpxor %ymm6,%ymm3,%ymm3
vpxor %ymm6,%ymm2,%ymm2
vpxor %ymm6,%ymm1,%ymm1
vpxor %ymm6,%ymm0,%ymm0
.L131: vmovdqu %ymm11,(%r9)
vmovdqu %ymm3,(%r9,%r15,4)
vmovdqu %ymm10,(%r9,%rsi,4)
vmovdqu %ymm2,(%r9,%rdx,4)
vmovdqu %ymm9,(%r9,%rcx,4)
vmovdqu %ymm1,(%r9,%r8,4)
vmovdqu %ymm5,(%r9,%rdi,4)
vmovdqu %ymm0,(%r9,%rax,4)
add $32,%r9
cmp %r9,-160(%rbp)
jne .L132
.L192: vzeroupper
.L190: add $200,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
ret
.L59: dec %rax
mov %rax,-176(%rbp)
shr $5,%rax
sal $7,%rax
lea 128(%r11,%rax),%rax
mov %rax,-184(%rbp)
vpcmpeqd %ymm0,%ymm0,%ymm0
mov %r11,%rax
.L64: vpxor 64(%rax),%ymm0,%ymm1
vpxor (%rax),%ymm0,%ymm2
vmovdqu %ymm1,64(%rax)
vmovdqu %ymm2,(%rax)
sub $-128,%rax
cmp -184(%rbp),%rax
jne .L64
mov -176(%rbp),%rdi
lea 64(%r11),%rsi
mov %rdi,%rax
shr $4,%rax
sal $6,%rax
add %rsi,%rax
mov %rax,-208(%rbp)
mov %rdi,%rax
shr $6,%rax
sal $8,%rax
lea 256(%r11,%rax),%rax
mov $4,%ecx
mov %r14,%r8
mov %rsi,-192(%rbp)
mov %rax,-216(%rbp)
movq $8,-112(%rbp)
vpcmpeqd %ymm11,%ymm11,%ymm11
mov %r10,%r14
cmp $64,%rcx
je .L200
.L68: cmp $32,%rcx
je .L201
cmp $16,%rcx
je .L74
cmp $8,%rcx
je .L202
.L76: mov -112(%rbp),%rdi
xor %edx,%edx
lea (%rdi,%rdi),%rax
cmp %r15,%rax
mov %rax,-152(%rbp)
setne %al
movzbl %al,%eax
mov %eax,-160(%rbp)
lea -1(%rdi),%rax
sete %dl
and $-8,%rax
movq $0,-136(%rbp)
mov %rax,-128(%rbp)
mov %rdi,%r10
test %r15,%r15
jle .L73
.L78: mov -112(%rbp),%rax
mov -136(%rbp),%rdi
add %r10,%rax
cmp %rdi,%rax
jle .L81
mov %rdi,%rsi
.p2align 4,,10
.p2align 3
.L84: mov %rsi,%rcx
mov %rsi,%rdi
add -112(%rbp),%rsi
cmp %rsi,%rcx
jge .L83
lea (%r11,%rcx,4),%rax
mov %rax,-80(%rbp)
mov -120(%rbp),%rax
add -128(%rbp),%rcx
lea (%rax,%rcx,4),%rcx
mov -80(%rbp),%rax
.p2align 4,,10
.p2align 3
.L80: vmovdqu (%rax),%ymm0
vmovdqu (%rax,%r15,4),%ymm15
vmovdqu (%rax,%r13,4),%ymm7
vpminsd %ymm0,%ymm15,%ymm6
vpmaxsd %ymm0,%ymm15,%ymm15
vmovdqu (%rax,%r12,4),%ymm0
vmovdqu (%rax,%r14,4),%ymm5
vpminsd %ymm0,%ymm7,%ymm1
vpmaxsd %ymm0,%ymm7,%ymm7
vmovdqu (%rax,%rbx,4),%ymm0
vmovdqu (%rax,%r9,4),%ymm4
vpminsd %ymm0,%ymm5,%ymm9
vpmaxsd %ymm0,%ymm5,%ymm5
vmovdqu (%rax,%r8,4),%ymm0
vpminsd %ymm1,%ymm6,%ymm8
vpminsd %ymm0,%ymm4,%ymm3
vpmaxsd %ymm0,%ymm4,%ymm4
vpminsd %ymm3,%ymm9,%ymm2
vpmaxsd %ymm4,%ymm5,%ymm0
vpmaxsd %ymm3,%ymm9,%ymm3
vpmaxsd %ymm1,%ymm6,%ymm6
vpminsd %ymm7,%ymm15,%ymm1
vpmaxsd %ymm7,%ymm15,%ymm15
vpminsd %ymm4,%ymm5,%ymm7
vpminsd %ymm2,%ymm8,%ymm14
vpminsd %ymm7,%ymm1,%ymm13
vpminsd %ymm3,%ymm6,%ymm12
vpminsd %ymm0,%ymm15,%ymm10
vpmaxsd %ymm3,%ymm6,%ymm6
vpmaxsd %ymm2,%ymm8,%ymm2
vpmaxsd %ymm7,%ymm1,%ymm1
vpmaxsd %ymm0,%ymm15,%ymm0
vmovdqa %ymm6,-80(%rbp)
vmovdqa %ymm6,%ymm3
vmovdqa %ymm14,%ymm9
vmovdqa %ymm2,%ymm5
vmovdqa %ymm13,%ymm8
vmovdqa %ymm1,%ymm4
vmovdqa %ymm12,%ymm7
vmovdqa %ymm10,%ymm6
vmovdqa %ymm0,%ymm15
test %edx,%edx
je .L79
vpxor -80(%rbp),%ymm11,%ymm3
vpxor %ymm14,%ymm11,%ymm9
vpxor %ymm13,%ymm11,%ymm8
vpxor %ymm12,%ymm11,%ymm7
vpxor %ymm10,%ymm11,%ymm6
vpxor %ymm2,%ymm11,%ymm5
vpxor %ymm1,%ymm11,%ymm4
vpxor %ymm0,%ymm11,%ymm15
.L79: vmovdqu %ymm9,(%rax)
vmovdqu %ymm8,(%rax,%r15,4)
vmovdqu %ymm7,(%rax,%r12,4)
vmovdqu %ymm6,(%rax,%r13,4)
vmovdqu %ymm5,(%rax,%rbx,4)
vmovdqu %ymm4,(%rax,%r14,4)
vmovdqu %ymm3,(%rax,%r8,4)
vmovdqu %ymm15,(%rax,%r9,4)
add $32,%rax
cmp %rax,%rcx
jne .L80
.L83: xor $1,%edx
cmp %rdi,%r10
jg .L84
.L81: mov -152(%rbp),%rdi
xor -160(%rbp),%edx
add %rdi,-136(%rbp)
add %rdi,%r10
mov -136(%rbp),%rax
cmp %rax,%r15
jg .L78
.L73: mov -112(%rbp),%rax
sal $4,%rax
cmp -144(%rbp),%rax
je .L203
mov -152(%rbp),%rax
mov %rax,%rcx
sar %rcx
cmp $254,%rax
jle .L66
mov %r8,-136(%rbp)
mov %r9,-160(%rbp)
mov %r15,-80(%rbp)
mov -144(%rbp),%r15
mov %rbx,-112(%rbp)
mov %r12,-128(%rbp)
mov %rcx,%rbx
mov %r11,%r12
.L67: mov %rbx,%rdx
sar $2,%rdx
mov %r15,%rsi
mov %r12,%rdi
vzeroupper
sar $3,%rbx
call int32_threestages
cmp $127,%rbx
vpcmpeqd %ymm11,%ymm11,%ymm11
jg .L67
mov %rbx,%rcx
mov %r12,%r11
mov -80(%rbp),%r15
mov -112(%rbp),%rbx
mov -128(%rbp),%r12
mov -136(%rbp),%r8
mov -160(%rbp),%r9
.L66: mov -152(%rbp),%rax
mov %rax,-112(%rbp)
cmp $64,%rcx
jne .L68
.L200: mov -144(%rbp),%rsi
mov %r11,%rdi
vzeroupper
call int32_twostages_32
vpcmpeqd %ymm11,%ymm11,%ymm11
.L74: mov %r11,%rax
.L69: vmovdqu 64(%rax),%ymm1
vmovdqu (%rax),%ymm0
vmovdqu 96(%rax),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu 32(%rax),%ymm1
sub $-128,%rax
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,-128(%rax)
vmovdqu %ymm2,-96(%rax)
vmovdqu %ymm3,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp -184(%rbp),%rax
jne .L69
jmp .L76
.L202: mov %r11,%rax
.L77: vmovdqu 32(%rax),%ymm0
vmovdqu (%rax),%ymm1
add $64,%rax
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,-208(%rbp)
jne .L77
jmp .L76
.L203: mov %r14,%r10
mov %r8,%r14
jmp .L60
.L201: mov %r11,%rax
.L71: vmovdqu 128(%rax),%ymm0
vmovdqu (%rax),%ymm6
vmovdqu 32(%rax),%ymm1
vpminsd %ymm0,%ymm6,%ymm7
vpmaxsd %ymm0,%ymm6,%ymm6
vmovdqu 160(%rax),%ymm0
vmovdqu 224(%rax),%ymm8
vpminsd %ymm0,%ymm1,%ymm5
vpmaxsd %ymm0,%ymm1,%ymm3
vmovdqu 192(%rax),%ymm1
vmovdqu 64(%rax),%ymm0
add $256,%rax
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu -160(%rax),%ymm1
vpminsd %ymm8,%ymm1,%ymm2
vpmaxsd %ymm8,%ymm1,%ymm1
vpminsd %ymm4,%ymm7,%ymm8
vpmaxsd %ymm4,%ymm7,%ymm4
vpminsd %ymm2,%ymm5,%ymm7
vpmaxsd %ymm2,%ymm5,%ymm2
vpminsd %ymm0,%ymm6,%ymm5
vpmaxsd %ymm0,%ymm6,%ymm0
vpminsd %ymm1,%ymm3,%ymm6
vpmaxsd %ymm1,%ymm3,%ymm1
vpminsd %ymm7,%ymm8,%ymm9
vpmaxsd %ymm7,%ymm8,%ymm3
vpminsd %ymm2,%ymm4,%ymm8
vpminsd %ymm6,%ymm5,%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2
vpmaxsd %ymm6,%ymm5,%ymm5
vpminsd %ymm1,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm9,-256(%rax)
vmovdqu %ymm3,-224(%rax)
vmovdqu %ymm8,-192(%rax)
vmovdqu %ymm2,-160(%rax)
vmovdqu %ymm7,-128(%rax)
vmovdqu %ymm5,-96(%rax)
vmovdqu %ymm4,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,-216(%rbp)
jne .L71
jmp .L76
.L194: mov 4(%rdi),%eax
mov 12(%rdi),%ebx
cmp (%rdi),%eax
cmovg (%rdi),%ecx
cmovg %eax,%r9d
cmp 8(%rdi),%ebx
cmovg 8(%rdi),%eax
cmovg %ebx,%edi
cmp %ecx,%eax
cmovg %ecx,%r8d
cmovg %eax,%eax
cmp %r9d,%edi
cmovg %r9d,%edx
cmovg %edi,%r9d
cmp %eax,%edx
cmovg %eax,%r12d
cmovg %edx,%r10d
mov 20(%r11),%eax
cmp 16(%r11),%eax
cmovg 16(%r11),%esi
cmovg %eax,%ecx
mov 28(%r11),%eax
cmp 24(%r11),%eax
cmovg 24(%r11),%edx
cmovg %eax,%edi
cmp %ecx,%edi
cmovg %ecx,%eax
cmovg %edi,%edi
cmp %esi,%edx
cmovg %esi,%ecx
cmovg %edx,%edx
cmp %r9d,%edi
cmovg %r9d,%ebx
cmovg %edi,%edi
cmp %edx,%eax
cmovg %edx,%esi
cmovg %eax,%edx
mov %edi,(%r11)
cmp %r12d,%esi
cmovg %r12d,%r9d
cmovg %esi,%esi
cmp %r10d,%edx
cmovg %r10d,%eax
cmovg %edx,%edx
cmp %esi,%ebx
cmovg %esi,%r13d
cmovg %ebx,%ebx
cmp %r8d,%ecx
cmovg %r8d,%esi
cmovg %ecx,%ecx
cmp %ecx,%eax
cmovg %ecx,%r8d
cmovg %eax,%eax
mov %esi,28(%r11)
cmp %edx,%ebx
cmovg %edx,%r12d
cmovg %ebx,%ecx
cmp %eax,%r13d
cmovg %eax,%ebx
cmovg %r13d,%edx
mov %ecx,4(%r11)
cmp %r8d,%r9d
cmovg %r8d,%r10d
cmovg %r9d,%eax
mov %r12d,8(%r11)
mov %edx,12(%r11)
mov %ebx,16(%r11)
mov %eax,20(%r11)
mov %r10d,24(%r11)
jmp .L190
.L126: cmpq $0,-144(%rbp)
jle .L129
mov %r11,%rax
.L130: vmovdqu 32(%rax),%ymm1
vmovdqu (%rax),%ymm0
add $64,%rax
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,-128(%rbp)
jne .L130
jmp .L129
.L195: vmovdqa .LC0(%rip),%ymm1
vmovdqa .LC1(%rip),%ymm3
vpxor 32(%rdi),%ymm1,%ymm2
vpxor (%rdi),%ymm1,%ymm1
mov -164(%rbp),%r14d
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm2
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm2,%ymm0,%ymm1
vpmaxsd %ymm2,%ymm0,%ymm0
vpxor %ymm3,%ymm1,%ymm1
vpxor %ymm3,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm1,%ymm4
vpunpckhdq %ymm0,%ymm1,%ymm0
vpmaxsd %ymm0,%ymm4,%ymm1
vpminsd %ymm0,%ymm4,%ymm2
vpunpcklqdq %ymm1,%ymm2,%ymm0
vpunpckhqdq %ymm1,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm0,%ymm1
vpunpckhdq %ymm2,%ymm0,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm4
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpminsd %ymm4,%ymm1,%ymm2
vpmaxsd %ymm4,%ymm1,%ymm1
vpunpckldq %ymm1,%ymm2,%ymm0
vpunpckhdq %ymm1,%ymm2,%ymm1
vpxor %ymm3,%ymm1,%ymm1
vpxor %ymm3,%ymm0,%ymm0
vperm2i128 $32,%ymm1,%ymm0,%ymm2
vperm2i128 $49,%ymm1,%ymm0,%ymm0
vpminsd %ymm2,%ymm0,%ymm1
vpmaxsd %ymm2,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm1,%ymm3
vperm2i128 $49,%ymm0,%ymm1,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vpunpcklqdq %ymm0,%ymm2,%ymm1
vpunpckhqdq %ymm0,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm3
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpcmpeqd %ymm0,%ymm0,%ymm0
test %r14d,%r14d
je .L54
vpxor %ymm0,%ymm1,%ymm1
mov %edx,%esi
.L55: vmovdqa %ymm2,%ymm0
mov %r11,%rdi
.L193: add $200,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
jmp merge16_finish
.L197: lea -1(%r15),%rax
mov %rax,-200(%rbp)
lea 32(%rdi),%rax
mov %rax,-120(%rbp)
jmp .L62
.L196: mov $1,%edx
mov $16,%esi
mov %rdi,-80(%rbp)
call djbsort$avx2_2power
mov -80(%rbp),%r11
xor %edx,%edx
lea 64(%r11),%r12
mov $16,%esi
mov %r12,%rdi
call djbsort$avx2_2power
mov -80(%rbp),%r11
mov -164(%rbp),%r13d
vmovdqu (%r11),%ymm4
vmovdqu 32(%r11),%ymm1
vmovdqu 64(%r11),%ymm2
vmovdqu 96(%r11),%ymm3
test %r13d,%r13d
je .L57
vpcmpeqd %ymm0,%ymm0,%ymm0
vpxor %ymm0,%ymm4,%ymm4
vpxor %ymm0,%ymm1,%ymm1
vpxor %ymm0,%ymm2,%ymm2
vpxor %ymm0,%ymm3,%ymm3
.L57: mov -164(%rbp),%esi
vpmaxsd %ymm1,%ymm3,%ymm5
vpminsd %ymm4,%ymm2,%ymm0
vpminsd %ymm1,%ymm3,%ymm1
vpmaxsd %ymm4,%ymm2,%ymm4
mov %r11,%rdi
vmovdqa %ymm4,-112(%rbp)
vmovdqa %ymm5,-80(%rbp)
call merge16_finish
vmovdqa -80(%rbp),%ymm5
vmovdqa -112(%rbp),%ymm4
vmovdqa %ymm5,%ymm1
vmovdqa %ymm4,%ymm0
mov %r12,%rdi
jmp .L193
.L54: vpxor %ymm0,%ymm2,%ymm2
mov %edx,%esi
jmp .L55
.endfn djbsort$avx2_2power,globl
.p2align 4
djbsort$avx2:
push %rbp
mov %rsp,%rbp
push %r15
mov %rdi,%r15
push %r14
mov %rsi,%r14
push %r13
push %r12
push %rbx
and $-32,%rsp
sub $1056,%rsp
cmp $8,%rsi
jle .L265
blsr %rsi,%rax
je .L220
lea -8(%rsi),%rax
mov %rax,8(%rsp)
mov $8,%ebx
cmp $8,%rax
jle .L266
.p2align 4,,10
.p2align 3
.L221: mov %rbx,%rax
mov %r14,%r12
add %rbx,%rbx
sub %rbx,%r12
cmp %rbx,%r12
jg .L221
cmp $128,%rbx
jle .L267
mov $1,%edx
mov %rbx,%rsi
mov %r15,%rdi
call djbsort$avx2_2power
lea (%r15,%rbx,4),%rdi
mov %r12,%rsi
call djbsort$avx2
lea 32(%r15),%rax
mov %rax,16(%rsp)
jmp .L230
.p2align 4,,10
.p2align 3
.L228: lea 0(%r13,%r12),%rdx
add %r11,%r12
sub %r9,%rdx
lea (%r15,%r12,4),%rsi
mov %r8,%rdi
sar $3,%rbx
call minmax_vector
cmp $63,%rbx
jle .L268
.L230: mov %rbx,%r12
sar $2,%r12
mov %r12,%rdx
mov %r14,%rsi
mov %r15,%rdi
call int32_threestages
lea 0(,%r12,4),%rcx
mov %r14,%rdx
sub %rcx,%rdx
lea (%rcx,%rax),%r13
lea (%r15,%rax,4),%r8
lea (%r15,%r13,4),%rsi
sub %rax,%rdx
mov %r8,%rdi
mov %rsi,24(%rsp)
mov %rax,%r9
mov %rax,%r11
call minmax_vector
cmp %r14,%r13
mov 24(%rsp),%rsi
lea (%r12,%r12),%r10
jle .L269
.L226: mov %r14,%r13
sub %r10,%r13
lea (%r11,%r12,2),%rax
mov %r13,%rdx
sub %r9,%rdx
lea (%r15,%rax,4),%rsi
mov %r8,%rdi
call minmax_vector
add %r9,%r10
cmp %r14,%r10
jg .L228
mov %r10,%rax
sub %r12,%rax
mov %r10,%r11
lea (%r15,%r10,4),%r8
cmp %rax,%r9
jge .L247
sub %r9,%rax
dec %rax
and $-8,%rax
lea (%r15,%r9,4),%rdx
add %rax,%r9
mov 16(%rsp),%rax
lea (%rax,%r9,4),%rax
.p2align 4,,10
.p2align 3
.L229: vmovdqu (%rdx,%r12,4),%ymm0
vmovdqu (%rdx),%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,(%rdx)
vmovdqu %ymm0,(%rdx,%r12,4)
add $32,%rdx
cmp %rdx,%rax
jne .L229
mov %r10,%r9
vzeroupper
jmp .L228
.p2align 4,,10
.p2align 3
.L267: mov %rbx,%rdx
sar $2,%rdx
sar $3,%rbx
lea 0(,%rax,4),%r12
lea 32(%rsp),%r13
cmp %rbx,%rdx
jle .L224
vmovdqa .LC4(%rip),%ymm0
.p2align 4,,10
.p2align 3
.L225: mov %rbx,%rax
sal $5,%rax
inc %rbx
vmovdqa %ymm0,0(%r13,%rax)
cmp %rdx,%rbx
jl .L225
vzeroupper
.L224: sal $2,%r14
mov %r14,%rdx
mov %r15,%rsi
mov %r13,%rdi
call memcpy
xor %edx,%edx
mov %r12,%rsi
mov %r13,%rdi
call djbsort$avx2_2power
mov %r14,%rdx
mov %r13,%rsi
mov %r15,%rdi
call memcpy
.L263: lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.p2align 4,,10
.p2align 3
.L269: lea (%r12,%r9),%rax
cmp %rax,%r9
jge .L246
notq %r11
add %r11,%rax
and $-8,%rax
add %rax,%r9
mov 16(%rsp),%rax
lea (%r10,%r12),%rdx
mov %r8,%rdi
lea (%rax,%r9,4),%rax
.p2align 4,,10
.p2align 3
.L227: vmovdqu (%rdi),%ymm1
vmovdqu (%rdi,%r10,4),%ymm0
vmovdqu (%rdi,%r12,4),%ymm4
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu (%rdi,%rdx,4),%ymm1
vpminsd %ymm4,%ymm1,%ymm3
vpmaxsd %ymm4,%ymm1,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%rdi)
vmovdqu %ymm2,(%rdi,%r12,4)
vmovdqu %ymm3,(%rdi,%r10,4)
vmovdqu %ymm0,(%rdi,%rdx,4)
add $32,%rdi
cmp %rdi,%rax
jne .L227
mov %rsi,%r8
mov %r13,%r11
mov %r13,%r9
vzeroupper
jmp .L226
.p2align 4,,10
.p2align 3
.L268: cmp $32,%rbx
je .L270
mov %r15,%r10
cmp $16,%rbx
je .L249
mov $32,%ebx
xor %r11d,%r11d
mov $15,%eax
xor %r9d,%r9d
.L237: cmp %rax,%r14
jle .L239
mov %r9,%rax
.p2align 4,,10
.p2align 3
.L240: vmovdqu 32(%r15,%rax,4),%ymm0
vmovdqu (%r15,%rax,4),%ymm2
mov %rax,%rdx
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
add $31,%rdx
vpunpckhdq %ymm0,%ymm2,%ymm0
vmovdqu %ymm1,(%r15,%rax,4)
vmovdqu %ymm0,32(%r15,%rax,4)
add $16,%rax
cmp %rdx,%r14
jg .L240
lea -16(%r14),%rax
sub %r9,%rax
lea 15(%r9),%rdx
and $-16,%rax
cmp %rdx,%r14
mov $0,%edx
cmovle %rdx,%rax
lea 16(%r9,%rax),%r9
mov %r9,%r11
lea 32(,%r9,4),%rbx
lea (%r15,%r9,4),%r10
vzeroupper
.L239: mov 8(%rsp),%rdx
lea (%r15,%rbx),%rsi
sub %r9,%rdx
mov %r10,%rdi
call minmax_vector
lea 16(,%r11,4),%rax
lea 7(%r9),%rdx
lea (%r15,%rax),%rsi
cmp %r14,%rdx
jge .L241
mov (%r10),%ebx
cmp (%rsi),%ebx
cmovg (%rsi),%ecx
cmovg %ebx,%edx
mov %ecx,(%r10)
mov %edx,(%rsi)
lea -12(%r15,%rax),%rbx
lea 4(%r15,%rax),%rdi
mov (%rbx),%edx
cmp (%rdi),%edx
cmovg (%rdi),%ecx
cmovg %edx,%edx
mov %ecx,(%rbx)
mov %edx,(%rdi)
lea -8(%r15,%rax),%r11
lea 8(%r15,%rax),%rdx
mov (%r11),%ecx
cmp (%rdx),%ecx
cmovg (%rdx),%r12d
cmovg %ecx,%ecx
mov %r12d,(%r11)
mov %ecx,(%rdx)
lea -4(%r15,%rax),%rcx
lea 12(%r15,%rax),%rax
mov (%rcx),%r13d
cmp (%rax),%r13d
cmovg (%rax),%r8d
cmovg %r13d,%r13d
mov %r8d,(%rcx)
mov %r13d,(%rax)
cmp %r12d,(%r10)
cmovg %r12d,%r13d
cmovg (%r10),%r12d
mov %r13d,(%r10)
mov %r12d,(%r11)
mov (%rbx),%r8d
cmp (%rcx),%r8d
cmovg (%rcx),%r12d
cmovg %r8d,%r13d
mov %r12d,(%rbx)
mov %r13d,(%rcx)
cmp %r12d,(%r10)
cmovg %r12d,%r13d
cmovg (%r10),%r12d
mov %r13d,(%r10)
mov %r12d,(%rbx)
mov (%r11),%r8d
cmp (%rcx),%r8d
cmovg (%rcx),%ebx
cmovg %r8d,%r10d
mov %ebx,(%r11)
mov %r10d,(%rcx)
lea 8(%r9),%r11
mov (%rsi),%ecx
cmp (%rdx),%ecx
cmovg (%rdx),%r10d
cmovg %ecx,%ecx
mov %r10d,(%rsi)
mov %ecx,(%rdx)
mov (%rdi),%ebx
cmp (%rax),%ebx
cmovg (%rax),%ecx
cmovg %ebx,%r10d
mov %ecx,(%rdi)
mov %r10d,(%rax)
cmp %ecx,(%rsi)
cmovg %ecx,%r10d
cmovg (%rsi),%ecx
mov %r10d,(%rsi)
mov %ecx,(%rdi)
mov (%rdx),%ecx
cmp (%rax),%ecx
cmovg (%rax),%esi
cmovg %ecx,%ecx
mov %esi,(%rdx)
mov %ecx,(%rax)
lea 48(,%r9,4),%rax
lea (%r15,%rax),%rsi
lea -16(%r15,%rax),%r10
mov %r11,%r9
.L241: lea -4(%r14),%rdx
sub %r9,%rdx
mov %r10,%rdi
call minmax_vector
lea 3(%r9),%rax
cmp %r14,%rax
jge .L242
lea 8(,%r11,4),%rax
lea (%r15,%rax),%rdx
mov (%r10),%ecx
cmp (%rdx),%ecx
cmovg (%rdx),%esi
cmovg %ecx,%ecx
mov %esi,(%r10)
mov %ecx,(%rdx)
lea -4(%r15,%rax),%rsi
lea 4(%r15,%rax),%rax
mov (%rsi),%ebx
cmp (%rax),%ebx
cmovg (%rax),%ecx
cmovg %ebx,%edi
mov %ecx,(%rsi)
mov %edi,(%rax)
cmp %ecx,(%r10)
cmovg %ecx,%edi
cmovg (%r10),%ecx
mov %edi,(%r10)
mov %ecx,(%rsi)
add $4,%r9
mov (%rdx),%ecx
cmp (%rax),%ecx
cmovg (%rax),%esi
cmovg %ecx,%ecx
mov %esi,(%rdx)
mov %ecx,(%rax)
.L242: lea 2(%r9),%rax
cmp %r14,%rax
jge .L243
lea 0(,%r9,4),%rax
lea (%r15,%rax),%rdx
lea 8(%r15,%rax),%rax
mov (%rdx),%ecx
cmp (%rax),%ecx
cmovg (%rax),%esi
cmovg %ecx,%ecx
mov %esi,(%rdx)
mov %ecx,(%rax)
.L243: lea 1(%r9),%rax
cmp %r14,%rax
jge .L263
sal $2,%r9
lea (%r15,%r9),%rdx
lea 4(%r15,%r9),%rax
mov (%rdx),%ecx
cmp (%rax),%ecx
cmovg (%rax),%esi
cmovg %ecx,%ecx
mov %esi,(%rdx)
mov %ecx,(%rax)
lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.p2align 4,,10
.p2align 3
.L265: je .L271
cmp $7,%rsi
je .L272
cmp $6,%rsi
je .L273
cmp $5,%rsi
je .L274
cmp $4,%rsi
je .L275
cmp $3,%rsi
je .L276
cmp $2,%rsi
jne .L263
mov (%rdi),%edx
mov 4(%rdi),%ecx
jmp .L217
.p2align 4,,10
.p2align 3
.L271: mov (%rdi),%ecx
cmp 4(%rdi),%ecx
cmovg 4(%rdi),%eax
cmovg %ecx,%ecx
mov %eax,%edx
cmp 8(%rdi),%ecx
cmovg 8(%rdi),%eax
cmovg %ecx,%ecx
mov %eax,%esi
cmp 12(%rdi),%ecx
cmovg 12(%rdi),%eax
cmovg %ecx,%ecx
mov %eax,%r9d
cmp 16(%rdi),%ecx
cmovg 16(%rdi),%eax
cmovg %ecx,%ecx
mov %eax,%edi
cmp 20(%r15),%ecx
cmovg 20(%r15),%eax
cmovg %ecx,%ecx
mov %eax,%r8d
cmp 24(%r15),%ecx
cmovg 24(%r15),%eax
cmovg %ecx,%ecx
mov %eax,%r10d
cmp 28(%r15),%ecx
cmovg 28(%r15),%eax
cmovg %ecx,%ecx
mov %ecx,28(%r15)
mov %eax,%r11d
.L207: cmp %esi,%edx
cmovg %esi,%ecx
cmovg %edx,%eax
cmp %r9d,%eax
cmovg %r9d,%esi
cmovg %eax,%eax
cmp %edi,%eax
cmovg %edi,%r9d
cmovg %eax,%eax
cmp %r8d,%eax
cmovg %r8d,%edi
cmovg %eax,%eax
cmp %r10d,%eax
cmovg %r10d,%r8d
cmovg %eax,%eax
cmp %r11d,%eax
cmovg %r11d,%r10d
cmovg %eax,%eax
mov %eax,24(%r15)
mov %ecx,%edx
.L209: cmp %esi,%edx
cmovg %esi,%ecx
cmovg %edx,%edx
cmp %r9d,%edx
cmovg %r9d,%esi
cmovg %edx,%eax
cmp %edi,%eax
cmovg %edi,%r9d
cmovg %eax,%eax
cmp %r8d,%eax
cmovg %r8d,%edi
cmovg %eax,%eax
cmp %r10d,%eax
cmovg %r10d,%r8d
cmovg %eax,%eax
mov %eax,20(%r15)
.L211: cmp %esi,%ecx
cmovg %esi,%edx
cmovg %ecx,%ecx
cmp %r9d,%ecx
cmovg %r9d,%esi
cmovg %ecx,%eax
mov %esi,%ecx
cmp %edi,%eax
cmovg %edi,%esi
cmovg %eax,%eax
cmp %r8d,%eax
cmovg %r8d,%edi
cmovg %eax,%eax
mov %eax,16(%r15)
.L213: cmp %ecx,%edx
cmovg %ecx,%eax
cmovg %edx,%edx
cmp %esi,%edx
cmovg %esi,%ecx
cmovg %edx,%edx
cmp %edi,%edx
cmovg %edi,%esi
cmovg %edx,%edx
mov %edx,12(%r15)
.L215: cmp %ecx,%eax
cmovg %ecx,%edx
cmovg %eax,%eax
cmp %esi,%eax
cmovg %esi,%ecx
cmovg %eax,%eax
mov %eax,8(%r15)
.L217: cmp %ecx,%edx
cmovg %ecx,%eax
cmovg %edx,%edx
mov %eax,(%r15)
mov %edx,4(%r15)
lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.L249: mov $64,%r12d
mov $32,%ebx
xor %r11d,%r11d
mov $31,%r8d
xor %r9d,%r9d
.L236: lea (%r15,%r9,4),%rax
mov %r9,%rcx
cmp %r8,%r14
jle .L235
.p2align 4,,10
.p2align 3
.L238: vmovdqu 64(%rax),%ymm1
vmovdqu 96(%rax),%ymm3
vmovdqu (%rax),%ymm0
vmovdqu 32(%rax),%ymm2
vpminsd %ymm1,%ymm0,%ymm5
vpmaxsd %ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm2,%ymm1
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm2,%ymm0,%ymm4
vpminsd %ymm1,%ymm5,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpmaxsd %ymm1,%ymm5,%ymm5
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $32,%ymm5,%ymm3,%ymm1
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vperm2i128 $49,%ymm5,%ymm3,%ymm3
vpminsd %ymm3,%ymm1,%ymm5
vpminsd %ymm0,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm0
vperm2i128 $32,%ymm1,%ymm5,%ymm3
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $49,%ymm1,%ymm5,%ymm5
vperm2i128 $49,%ymm0,%ymm4,%ymm4
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm5
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm5,%ymm3
vpunpckldq %ymm0,%ymm4,%ymm2
vpunpckhdq %ymm1,%ymm5,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm4
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
mov %rcx,%rdx
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm4,%ymm5
vpunpckldq %ymm0,%ymm3,%ymm2
vpunpckhdq %ymm1,%ymm4,%ymm1
vpunpckhdq %ymm0,%ymm3,%ymm0
add $63,%rdx
vmovdqu %ymm5,(%rax)
vmovdqu %ymm1,32(%rax)
vmovdqu %ymm2,64(%rax)
vmovdqu %ymm0,96(%rax)
add $32,%rcx
sub $-128,%rax
cmp %rdx,%r14
jg .L238
lea -32(%r14),%rax
sub %r9,%rax
lea 31(%r9),%rdx
and $-32,%rax
cmp %rdx,%r14
mov $0,%edx
cmovle %rdx,%rax
lea 32(%r9,%rax),%r9
lea 64(,%r9,4),%r12
mov %r9,%r11
lea (%r15,%r9,4),%r10
lea -32(%r12),%rbx
vzeroupper
.L235: lea -16(%r14),%rdx
sub %r9,%rdx
lea (%r15,%r12),%rsi
mov %r10,%rdi
call minmax_vector
lea 15(%r9),%rax
jmp .L237
.p2align 4,,10
.p2align 3
.L220: xor %edx,%edx
call djbsort$avx2_2power
lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.p2align 4,,10
.p2align 3
.L246: mov %rsi,%r8
mov %r13,%r11
mov %r13,%r9
jmp .L226
.p2align 4,,10
.p2align 3
.L247: mov %r10,%r9
jmp .L228
.p2align 4,,10
.p2align 3
.L266: vmovdqa .LC4(%rip),%ymm0
mov $16,%r12d
lea 32(%rsp),%r13
vmovdqa %ymm0,64(%rsp)
vzeroupper
jmp .L224
.p2align 4,,10
.p2align 3
.L270: cmp $63,%r14
jle .L248
lea -64(%r14),%rcx
shr $6,%rcx
mov %rcx,%rdx
sal $8,%rdx
mov %r15,%rax
lea 256(%r15,%rdx),%rdx
.p2align 4,,10
.p2align 3
.L233: vmovdqu 128(%rax),%ymm0
vmovdqu (%rax),%ymm3
vmovdqu 32(%rax),%ymm15
vpminsd %ymm0,%ymm3,%ymm13
vpmaxsd %ymm0,%ymm3,%ymm3
vmovdqu 160(%rax),%ymm0
vmovdqu 224(%rax),%ymm2
vmovdqu 64(%rax),%ymm6
vmovdqu 96(%rax),%ymm5
vpminsd %ymm0,%ymm15,%ymm4
vpmaxsd %ymm0,%ymm15,%ymm15
vmovdqu 192(%rax),%ymm0
add $256,%rax
vpminsd %ymm0,%ymm6,%ymm1
vpmaxsd %ymm0,%ymm6,%ymm6
vpminsd %ymm2,%ymm5,%ymm0
vpmaxsd %ymm2,%ymm5,%ymm5
vpminsd %ymm0,%ymm4,%ymm11
vpminsd %ymm1,%ymm13,%ymm14
vpmaxsd %ymm0,%ymm4,%ymm4
vpminsd %ymm5,%ymm15,%ymm12
vpminsd %ymm6,%ymm3,%ymm0
vpmaxsd %ymm5,%ymm15,%ymm15
vpmaxsd %ymm6,%ymm3,%ymm3
vpmaxsd %ymm1,%ymm13,%ymm13
vpminsd %ymm15,%ymm3,%ymm8
vpminsd %ymm4,%ymm13,%ymm1
vpminsd %ymm12,%ymm0,%ymm5
vpmaxsd %ymm4,%ymm13,%ymm13
vpminsd %ymm11,%ymm14,%ymm2
vpmaxsd %ymm12,%ymm0,%ymm12
vpmaxsd %ymm11,%ymm14,%ymm14
vpmaxsd %ymm15,%ymm3,%ymm3
vperm2i128 $32,%ymm14,%ymm2,%ymm11
vperm2i128 $32,%ymm13,%ymm1,%ymm10
vperm2i128 $32,%ymm12,%ymm5,%ymm9
vperm2i128 $49,%ymm12,%ymm5,%ymm0
vperm2i128 $32,%ymm3,%ymm8,%ymm4
vperm2i128 $49,%ymm14,%ymm2,%ymm2
vperm2i128 $49,%ymm13,%ymm1,%ymm1
vperm2i128 $49,%ymm3,%ymm8,%ymm3
vpminsd %ymm2,%ymm11,%ymm15
vpminsd %ymm1,%ymm10,%ymm14
vpmaxsd %ymm2,%ymm11,%ymm2
vpmaxsd %ymm1,%ymm10,%ymm1
vpminsd %ymm0,%ymm9,%ymm13
vpminsd %ymm3,%ymm4,%ymm12
vpmaxsd %ymm0,%ymm9,%ymm0
vpmaxsd %ymm3,%ymm4,%ymm8
vperm2i128 $49,%ymm2,%ymm15,%ymm11
vperm2i128 $49,%ymm1,%ymm14,%ymm10
vperm2i128 $49,%ymm0,%ymm13,%ymm9
vperm2i128 $32,%ymm2,%ymm15,%ymm7
vperm2i128 $32,%ymm1,%ymm14,%ymm6
vperm2i128 $32,%ymm0,%ymm13,%ymm5
vperm2i128 $32,%ymm8,%ymm12,%ymm4
vperm2i128 $49,%ymm8,%ymm12,%ymm8
vpunpcklqdq %ymm11,%ymm7,%ymm3
vpunpcklqdq %ymm10,%ymm6,%ymm2
vpunpcklqdq %ymm9,%ymm5,%ymm1
vpunpcklqdq %ymm8,%ymm4,%ymm0
vpunpckhqdq %ymm11,%ymm7,%ymm7
vpunpckhqdq %ymm10,%ymm6,%ymm6
vpunpckhqdq %ymm9,%ymm5,%ymm5
vpunpckhqdq %ymm8,%ymm4,%ymm4
vpminsd %ymm3,%ymm7,%ymm11
vpminsd %ymm2,%ymm6,%ymm10
vpminsd %ymm1,%ymm5,%ymm9
vpminsd %ymm0,%ymm4,%ymm8
vpmaxsd %ymm3,%ymm7,%ymm7
vpmaxsd %ymm2,%ymm6,%ymm6
vpmaxsd %ymm1,%ymm5,%ymm5
vpmaxsd %ymm0,%ymm4,%ymm4
vpunpckldq %ymm7,%ymm11,%ymm3
vpunpckldq %ymm6,%ymm10,%ymm2
vpunpckhdq %ymm7,%ymm11,%ymm7
vpunpckhdq %ymm6,%ymm10,%ymm6
vpunpckldq %ymm5,%ymm9,%ymm1
vpunpckldq %ymm4,%ymm8,%ymm0
vpunpckhdq %ymm5,%ymm9,%ymm5
vpunpckhdq %ymm4,%ymm8,%ymm4
vpunpcklqdq %ymm7,%ymm3,%ymm10
vpunpcklqdq %ymm5,%ymm1,%ymm8
vpunpckhqdq %ymm7,%ymm3,%ymm3
vpunpcklqdq %ymm6,%ymm2,%ymm9
vpunpcklqdq %ymm4,%ymm0,%ymm7
vpunpckhqdq %ymm6,%ymm2,%ymm2
vpunpckhqdq %ymm5,%ymm1,%ymm1
vpunpckhqdq %ymm4,%ymm0,%ymm0
vpminsd %ymm8,%ymm1,%ymm5
vpminsd %ymm9,%ymm2,%ymm6
vpminsd %ymm7,%ymm0,%ymm4
vpminsd %ymm10,%ymm3,%ymm11
vpmaxsd %ymm8,%ymm1,%ymm1
vpmaxsd %ymm7,%ymm0,%ymm0
vpmaxsd %ymm10,%ymm3,%ymm3
vpmaxsd %ymm9,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm6,%ymm7
vpunpckldq %ymm3,%ymm11,%ymm8
vpunpckhdq %ymm2,%ymm6,%ymm2
vpunpckhdq %ymm3,%ymm11,%ymm3
vpunpckldq %ymm1,%ymm5,%ymm6
vpunpckhdq %ymm1,%ymm5,%ymm1
vpunpckldq %ymm0,%ymm4,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm0
vmovdqu %ymm8,-256(%rax)
vmovdqu %ymm3,-224(%rax)
vmovdqu %ymm7,-192(%rax)
vmovdqu %ymm2,-160(%rax)
vmovdqu %ymm6,-128(%rax)
vmovdqu %ymm1,-96(%rax)
vmovdqu %ymm5,-64(%rax)
vmovdqu %ymm0,-32(%rax)
cmp %rax,%rdx
jne .L233
lea 1(%rcx),%rax
mov %rax,%r9
sal $6,%r9
lea 128(,%r9,4),%rcx
sal $8,%rax
mov %r9,%r11
lea (%r15,%rax),%r10
lea -96(%rcx),%rbx
lea -64(%rcx),%r12
lea 31(%r9),%r8
vzeroupper
.L232: lea -32(%r14),%rdx
sub %r9,%rdx
lea (%r15,%rcx),%rsi
mov %r10,%rdi
call minmax_vector
jmp .L236
.L272: mov (%rdi),%edx
mov 4(%rdi),%esi
mov 8(%rdi),%r9d
mov 16(%r15),%r8d
mov 12(%rdi),%edi
mov 20(%r15),%r10d
mov 24(%r15),%r11d
jmp .L207
.L248: mov %r15,%r10
mov $64,%r12d
mov $32,%ebx
mov $31,%r8d
mov $128,%ecx
xor %r11d,%r11d
xor %r9d,%r9d
jmp .L232
.L276: mov (%rdi),%eax
mov 4(%rdi),%ecx
mov 8(%rdi),%esi
jmp .L215
.L275: mov (%rdi),%edx
mov 4(%rdi),%ecx
mov 8(%rdi),%esi
mov 12(%rdi),%edi
jmp .L213
.L274: mov (%rdi),%ecx
mov 4(%rdi),%esi
mov 8(%rdi),%r9d
mov 16(%r15),%r8d
mov 12(%rdi),%edi
jmp .L211
.L273: mov (%rdi),%edx
mov 4(%rdi),%esi
mov 8(%rdi),%r9d
mov 16(%r15),%r8d
mov 12(%rdi),%edi
mov 20(%r15),%r10d
jmp .L209
.endfn djbsort$avx2,globl
.rodata.cst32
.LC0: .quad -1,0,-1,0
.LC1: .quad 0,-1,-1,0
.LC2: .quad -1,-1,0,0
.LC3: .quad -4294967296,4294967295,-4294967296,4294967295
.LC4: .quad 0x7fffffff7fffffff,0x7fffffff7fffffff
.quad 0x7fffffff7fffffff,0x7fffffff7fffffff