cosmopolitan/libc/nexgen32e/djbsort-avx2.S

2090 lines
44 KiB
ArmAsm

#include "libc/macros.h"
.source __FILE__
/ D.J. Bernstein's outrageously fast integer sorting algorithm.
/
/ @param rdi is int32 array
/ @param rsi is number of elements in rdi
/ @note public domain
/ @see en.wikipedia.org/wiki/Sorting_network
djbsort$avx2:
push %rbp
mov %rsp,%rbp
push %r15
push %r14
push %r13
mov %rsi,%r13
push %r12
mov %rdi,%r12
push %rbx
andq $-32,%rsp
sub $1056,%rsp
cmp $8,%rsi
jg .L148
jne .L149
mov (%rdi),%eax
mov 4(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%rdi)
mov 8(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%rdi)
mov 12(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%rdi)
mov 16(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,12(%rdi)
mov 20(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,16(%rdi)
mov 24(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,20(%rdi)
mov 28(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,24(%rdi)
mov %edx,28(%rdi)
jmp .L150
.L149: cmp $7,%rsi
jne .L151
.L150: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov 12(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,8(%r12)
mov 16(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,12(%r12)
mov 20(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,16(%r12)
mov 24(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,20(%r12)
mov %edx,24(%r12)
jmp .L152
.L151: cmp $6,%rsi
jne .L153
.L152: mov (%r12),%eax
mov 4(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%r12)
mov 8(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%r12)
mov 12(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%r12)
mov 16(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,12(%r12)
mov 20(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,16(%r12)
mov %edx,20(%r12)
jmp .L154
.L153: cmp $5,%rsi
jne .L155
.L154: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov 12(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,8(%r12)
mov 16(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,12(%r12)
mov %edx,16(%r12)
jmp .L156
.L155: cmp $4,%rsi
jne .L157
.L156: mov (%r12),%eax
mov 4(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%r12)
mov 8(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%r12)
mov 12(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%r12)
mov %edx,12(%r12)
jmp .L158
.L157: cmp $3,%rsi
jne .L159
.L158: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov %edx,8(%r12)
jmp .L160
.L159: cmp $2,%rsi
jne .L147
.L160: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov %eax,4(%r12)
jmp .L147
.L148: lea -1(%rsi),%rax
mov $8,%ebx
test %rsi,%rax
jne .L162
xor %edx,%edx
call int32_sort_2power
jmp .L147
.L162: mov %r13,%r14
sub %rbx,%r14
cmp %rbx,%r14
jle .L199
add %rbx,%rbx
jmp .L162
.L199: cmp $128,%rbx
jg .L164
mov %rbx,%rax
mov %rbx,%rdx
vmovdqa .LC4(%rip),%ymm0
sar $3,%rax
sar $2,%rdx
.L165: cmp %rdx,%rax
jge .L200
mov %rax,%rcx
incq %rax
salq $5,%rcx
vmovdqa %ymm0,32(%rsp,%rcx)
jmp .L165
.L200: xor %eax,%eax
.L167: mov (%r12,%rax,4),%edx
mov %rax,%r14
mov %edx,32(%rsp,%rax,4)
lea 1(%rax),%rax
cmp %rax,%r13
jne .L167
lea (%rbx,%rbx),%rsi
xor %edx,%edx
lea 32(%rsp),%rdi
call int32_sort_2power
xor %eax,%eax
.L168: mov 32(%rsp,%rax,4),%ecx
mov %rax,%rdx
mov %ecx,(%r12,%rax,4)
incq %rax
cmp %rdx,%r14
jne .L168
jmp .L147
.L164: mov %rbx,%rsi
mov %r12,%rdi
mov $1,%edx
call int32_sort_2power
lea (%r12,%rbx,4),%rdi
mov %r14,%rsi
call djbsort$avx2
.L175: mov %rbx,%r14
mov %r13,%rsi
mov %r12,%rdi
sar $2,%r14
mov %r14,%rdx
call int32_threestages
lea 0(,%r14,4),%r10
mov %r13,%rdx
lea (%r10,%rax),%r11
sub %r10,%rdx
lea (%r12,%rax,4),%rdi
mov %rax,%r9
sub %rax,%rdx
lea (%r12,%r11,4),%rsi
call minmax_vector
lea (%r14,%r14),%rax
mov %rax,24(%rsp)
cmp %r13,%r11
jg .L169
imul $-8,%r14,%rax
lea (%r12,%r10),%rdx
lea (%rdx,%r10),%rcx
lea (%r14,%r9),%r15
lea (%rcx,%r10),%rdi
add %rdi,%rax
lea (%rax,%r10),%rsi
lea (%rsi,%r10),%r8
.L170: cmp %r9,%r15
jle .L201
vmovdqu (%rcx,%r9,4),%ymm7
vmovdqu (%rdi,%r9,4),%ymm6
vpminsd (%r12,%r9,4),%ymm7,%ymm2
vpminsd (%rdx,%r9,4),%ymm6,%ymm3
vpmaxsd (%r12,%r9,4),%ymm7,%ymm0
vpmaxsd (%rdx,%r9,4),%ymm6,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%r9,4)
vmovdqu %ymm2,(%rax,%r9,4)
vmovdqu %ymm3,(%rsi,%r9,4)
vmovdqu %ymm0,(%r8,%r9,4)
add $8,%r9
jmp .L170
.L201: mov %r11,%r9
.L169: mov 24(%rsp),%rax
lea (%r14,%r14),%r15
mov %r13,%r11
lea (%r12,%r9,4),%rdi
sub %r15,%r11
add %r9,%rax
mov %r11,%rdx
lea (%r12,%rax,4),%rsi
sub %r9,%rdx
call minmax_vector
lea (%r15,%r9),%rax
cmp %r13,%rax
jg .L172
mov %rax,%rdx
add %r12,%r10
sub %r14,%rdx
.L173: cmp %r9,%rdx
jle .L202
vmovdqu (%r10,%r9,4),%ymm6
vpminsd (%r12,%r9,4),%ymm6,%ymm1
vpmaxsd (%r12,%r9,4),%ymm6,%ymm0
vmovdqu %ymm1,(%r12,%r9,4)
vmovdqu %ymm0,(%r10,%r9,4)
add $8,%r9
jmp .L173
.L202: mov %rax,%r9
.L172: lea (%r11,%r14),%rdx
add %r9,%r14
lea (%r12,%r9,4),%rdi
sar $3,%rbx
sub %r9,%rdx
lea (%r12,%r14,4),%rsi
call minmax_vector
cmp $63,%rbx
jg .L175
cmp $32,%rbx
jne .L176
mov %r12,%rax
mov $63,%edx
.L177: cmp %r13,%rdx
jge .L203
vmovdqu (%rax),%ymm6
add $64,%rdx
add $256,%rax
vpminsd -128(%rax),%ymm6,%ymm10
vpmaxsd -128(%rax),%ymm6,%ymm8
vmovdqu -224(%rax),%ymm6
vpminsd -96(%rax),%ymm6,%ymm3
vpmaxsd -96(%rax),%ymm6,%ymm0
vmovdqu -192(%rax),%ymm6
vpminsd -64(%rax),%ymm6,%ymm2
vpmaxsd -64(%rax),%ymm6,%ymm1
vmovdqu -160(%rax),%ymm6
vpmaxsd -32(%rax),%ymm6,%ymm4
vpminsd -32(%rax),%ymm6,%ymm13
vpminsd %ymm2,%ymm10,%ymm15
vpminsd %ymm1,%ymm8,%ymm12
vpminsd %ymm13,%ymm3,%ymm11
vpminsd %ymm4,%ymm0,%ymm5
vpmaxsd %ymm1,%ymm8,%ymm1
vpmaxsd %ymm2,%ymm10,%ymm2
vpmaxsd %ymm13,%ymm3,%ymm13
vpmaxsd %ymm4,%ymm0,%ymm0
vpminsd %ymm13,%ymm2,%ymm10
vpminsd %ymm0,%ymm1,%ymm4
vpminsd %ymm5,%ymm12,%ymm9
vpminsd %ymm11,%ymm15,%ymm14
vpmaxsd %ymm13,%ymm2,%ymm13
vpmaxsd %ymm0,%ymm1,%ymm0
vpmaxsd %ymm11,%ymm15,%ymm15
vpmaxsd %ymm5,%ymm12,%ymm12
vperm2i128 $32,%ymm13,%ymm10,%ymm6
vperm2i128 $32,%ymm12,%ymm9,%ymm5
vperm2i128 $32,%ymm0,%ymm4,%ymm8
vperm2i128 $32,%ymm15,%ymm14,%ymm11
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vperm2i128 $49,%ymm12,%ymm9,%ymm12
vperm2i128 $49,%ymm15,%ymm14,%ymm14
vperm2i128 $49,%ymm13,%ymm10,%ymm13
vpminsd %ymm14,%ymm11,%ymm3
vpminsd %ymm12,%ymm5,%ymm1
vpminsd %ymm13,%ymm6,%ymm2
vpmaxsd %ymm12,%ymm5,%ymm9
vpmaxsd %ymm14,%ymm11,%ymm11
vpminsd %ymm0,%ymm8,%ymm12
vperm2i128 $32,%ymm9,%ymm1,%ymm5
vpmaxsd %ymm0,%ymm8,%ymm8
vpmaxsd %ymm13,%ymm6,%ymm10
vperm2i128 $32,%ymm11,%ymm3,%ymm7
vperm2i128 $32,%ymm10,%ymm2,%ymm6
vperm2i128 $49,%ymm11,%ymm3,%ymm11
vperm2i128 $49,%ymm10,%ymm2,%ymm10
vperm2i128 $49,%ymm9,%ymm1,%ymm9
vperm2i128 $32,%ymm8,%ymm12,%ymm4
vperm2i128 $49,%ymm8,%ymm12,%ymm8
vpunpcklqdq %ymm11,%ymm7,%ymm3
vpunpcklqdq %ymm10,%ymm6,%ymm2
vpunpcklqdq %ymm9,%ymm5,%ymm1
vpunpcklqdq %ymm8,%ymm4,%ymm0
vpunpckhqdq %ymm11,%ymm7,%ymm7
vpunpckhqdq %ymm10,%ymm6,%ymm6
vpunpckhqdq %ymm9,%ymm5,%ymm5
vpunpckhqdq %ymm8,%ymm4,%ymm4
vpminsd %ymm3,%ymm7,%ymm11
vpminsd %ymm2,%ymm6,%ymm10
vpminsd %ymm1,%ymm5,%ymm9
vpminsd %ymm0,%ymm4,%ymm8
vpmaxsd %ymm3,%ymm7,%ymm7
vpmaxsd %ymm2,%ymm6,%ymm6
vpmaxsd %ymm1,%ymm5,%ymm5
vpunpckldq %ymm7,%ymm11,%ymm3
vpmaxsd %ymm0,%ymm4,%ymm4
vpunpckhdq %ymm7,%ymm11,%ymm7
vpunpckldq %ymm6,%ymm10,%ymm2
vpunpckldq %ymm5,%ymm9,%ymm1
vpunpckhdq %ymm6,%ymm10,%ymm6
vpunpckhdq %ymm5,%ymm9,%ymm5
vpunpckldq %ymm4,%ymm8,%ymm0
vpunpckhdq %ymm4,%ymm8,%ymm4
vpunpcklqdq %ymm7,%ymm3,%ymm10
vpunpcklqdq %ymm5,%ymm1,%ymm8
vpunpckhqdq %ymm7,%ymm3,%ymm3
vpunpcklqdq %ymm6,%ymm2,%ymm9
vpunpcklqdq %ymm4,%ymm0,%ymm7
vpunpckhqdq %ymm6,%ymm2,%ymm2
vpunpckhqdq %ymm5,%ymm1,%ymm1
vpunpckhqdq %ymm4,%ymm0,%ymm0
vpminsd %ymm8,%ymm1,%ymm5
vpminsd %ymm9,%ymm2,%ymm6
vpminsd %ymm7,%ymm0,%ymm4
vpminsd %ymm10,%ymm3,%ymm11
vpmaxsd %ymm8,%ymm1,%ymm1
vpmaxsd %ymm7,%ymm0,%ymm0
vpmaxsd %ymm10,%ymm3,%ymm3
vpmaxsd %ymm9,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm6,%ymm7
vpunpckldq %ymm3,%ymm11,%ymm8
vpunpckhdq %ymm2,%ymm6,%ymm2
vpunpckhdq %ymm3,%ymm11,%ymm3
vpunpckldq %ymm1,%ymm5,%ymm6
vpunpckhdq %ymm1,%ymm5,%ymm1
vmovdqu %ymm8,-256(%rax)
vpunpckldq %ymm0,%ymm4,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm0
vmovdqu %ymm3,-224(%rax)
vmovdqu %ymm7,-192(%rax)
vmovdqu %ymm2,-160(%rax)
vmovdqu %ymm6,-128(%rax)
vmovdqu %ymm1,-96(%rax)
vmovdqu %ymm5,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L177
.L203: mov %r13,%rdi
mov %r13,%r9
lea -32(%r13),%rdx
shr $6,%rdi
andq $-64,%r9
salq $8,%rdi
sub %r9,%rdx
lea 128(%r12,%rdi),%rsi
add %r12,%rdi
call minmax_vector
jmp .L180
.L176: xor %r10d,%r10d
cmp $16,%rbx
jne .L181
xor %r9d,%r9d
.L180: lea 31(%r9),%rax
.L179: cmp %r13,%rax
jge .L204
vmovdqu -124(%r12,%rax,4),%ymm6
vpminsd -60(%r12,%rax,4),%ymm6,%ymm5
vpmaxsd -60(%r12,%rax,4),%ymm6,%ymm0
vmovdqu -92(%r12,%rax,4),%ymm6
vpminsd -28(%r12,%rax,4),%ymm6,%ymm1
vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2
vpminsd %ymm1,%ymm5,%ymm3
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm5,%ymm5
vpmaxsd %ymm2,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $32,%ymm5,%ymm3,%ymm1
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vperm2i128 $49,%ymm5,%ymm3,%ymm3
vpminsd %ymm0,%ymm2,%ymm4
vpmaxsd %ymm0,%ymm2,%ymm0
vpminsd %ymm3,%ymm1,%ymm5
vpmaxsd %ymm3,%ymm1,%ymm1
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $32,%ymm1,%ymm5,%ymm3
vperm2i128 $49,%ymm0,%ymm4,%ymm4
vperm2i128 $49,%ymm1,%ymm5,%ymm5
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm5
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm5,%ymm3
vpunpckldq %ymm0,%ymm4,%ymm2
vpunpckhdq %ymm1,%ymm5,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm4
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm4,%ymm5
vpunpckldq %ymm0,%ymm3,%ymm2
vpunpckhdq %ymm1,%ymm4,%ymm1
vpunpckhdq %ymm0,%ymm3,%ymm0
vmovdqu %ymm5,-124(%r12,%rax,4)
vmovdqu %ymm1,-92(%r12,%rax,4)
vmovdqu %ymm2,-60(%r12,%rax,4)
vmovdqu %ymm0,-28(%r12,%rax,4)
add $32,%rax
jmp .L179
.L204: mov %r13,%r10
xor %edx,%edx
lea 0(,%r9,4),%rax
sub %r9,%r10
mov %r10,%rdi
andq $-32,%r10
shr $5,%rdi
cmp %r9,%r13
cmovl %rdx,%r10
salq $7,%rdi
add %r9,%r10
cmp %r9,%r13
cmovl %rdx,%rdi
lea -16(%r13),%rdx
sub %r10,%rdx
lea 64(%rax,%rdi),%rsi
add %rax,%rdi
add %r12,%rsi
add %r12,%rdi
call minmax_vector
.L181: lea 15(%r10),%rax
.L183: cmp %r13,%rax
jge .L205
vmovdqu -60(%r12,%rax,4),%ymm6
vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2
vpminsd -28(%r12,%rax,4),%ymm6,%ymm1
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-60(%r12,%rax,4)
vmovdqu %ymm0,-28(%r12,%rax,4)
add $16,%rax
jmp .L183
.L205: mov %r13,%r9
xor %edx,%edx
lea 0(,%r10,4),%rcx
sub %r10,%r9
mov %r9,%rax
andq $-16,%r9
shr $4,%rax
cmp %r10,%r13
cmovl %rdx,%r9
salq $6,%rax
add %r10,%r9
cmp %r10,%r13
cmovl %rdx,%rax
lea -8(%r13),%rdx
sub %r9,%rdx
lea (%rax,%rcx),%r10
lea 32(%rcx,%rax),%rsi
add %r12,%r10
add %r12,%rsi
mov %r10,%rdi
call minmax_vector
lea 7(%r9),%rax
cmp %r13,%rax
jge .L185
lea 16(,%r9,4),%rax
mov (%r10),%ecx
add $8,%r9
lea -12(%r12,%rax),%r14
lea (%r12,%rax),%rbx
lea 4(%r12,%rax),%r11
mov (%rbx),%edx
lea 8(%r12,%rax),%r8
cmp %edx,%ecx
mov %ecx,%esi
cmovg %edx,%ecx
cmovg %esi,%edx
mov %ecx,(%r10)
mov %edx,(%rbx)
mov (%r14),%ecx
mov (%r11),%edx
cmp %edx,%ecx
mov %ecx,%esi
cmovg %edx,%ecx
cmovg %esi,%edx
lea -8(%r12,%rax),%rsi
mov %ecx,(%r14)
mov %edx,(%r11)
mov (%rsi),%ecx
mov (%r8),%edx
cmp %edx,%ecx
mov %ecx,%edi
cmovg %edx,%ecx
cmovg %edi,%edx
lea 12(%r12,%rax),%rdi
mov %ecx,(%rsi)
lea -4(%r12,%rax),%rcx
mov %edx,(%r8)
mov (%rcx),%edx
mov (%rdi),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%rcx)
mov %eax,(%rdi)
mov (%r10),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r10)
mov %eax,(%rsi)
mov (%rcx),%eax
mov (%r14),%edx
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r14)
mov %eax,(%rcx)
mov (%r10),%edx
mov (%r14),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r10)
mov %eax,(%r14)
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%r10d
cmovg %eax,%edx
cmovg %r10d,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
mov (%rbx),%edx
mov (%r8),%esi
mov (%rdi),%ecx
cmp %esi,%edx
mov %edx,%eax
cmovg %esi,%edx
cmovg %eax,%esi
mov (%r11),%eax
cmp %ecx,%eax
mov %eax,%r10d
cmovg %ecx,%eax
cmovg %r10d,%ecx
cmp %eax,%edx
mov %edx,%r10d
cmovg %eax,%edx
cmovg %r10d,%eax
mov %edx,(%rbx)
mov %esi,%edx
mov %eax,(%r11)
mov %ecx,%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r8)
mov %eax,(%rdi)
.L185: lea 4(%r9),%r10
lea -4(%r13),%rdx
lea 0(,%r10,4),%rbx
sub %r9,%rdx
lea -16(%r12,%rbx),%r11
lea (%r12,%rbx),%rsi
mov %r11,%rdi
call minmax_vector
lea 3(%r9),%rax
cmp %r13,%rax
jge .L186
lea -8(%r12,%rbx),%rcx
mov (%r11),%edx
lea -12(%r12,%rbx),%rdi
mov %r10,%r9
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%esi
cmovg %eax,%edx
cmovg %esi,%eax
lea -4(%r12,%rbx),%rsi
mov %edx,(%r11)
mov %eax,(%rcx)
mov (%rdi),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%r8d
cmovg %eax,%edx
cmovg %r8d,%eax
mov %edx,(%rdi)
mov %eax,(%rsi)
mov (%rdi),%eax
mov (%r11),%edx
cmp %eax,%edx
mov %edx,%r8d
cmovg %eax,%edx
cmovg %r8d,%eax
mov %edx,(%r11)
mov %eax,(%rdi)
mov (%rcx),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rcx)
mov %eax,(%rsi)
.L186: lea 2(%r9),%rax
cmp %r13,%rax
jge .L187
lea 0(,%r9,4),%rax
lea (%r12,%rax),%rsi
lea 8(%r12,%rax),%rcx
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
.L187: lea 1(%r9),%rax
cmp %r13,%rax
jge .L147
salq $2,%r9
lea (%r12,%r9),%rsi
lea 4(%r12,%r9),%rcx
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
.L147: lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn djbsort$avx2,globl,hidden
minmax_vector:
cmp $7,%rdx
jg .L13
.L2: test %rdx,%rdx
jle .L15
mov (%rdi),%ecx
mov (%rsi),%eax
add $4,%rdi
add $4,%rsi
cmp %eax,%ecx
mov %ecx,%r8d
cmovg %eax,%ecx
cmovg %r8d,%eax
decq %rdx
mov %ecx,-4(%rdi)
mov %eax,-4(%rsi)
jmp .L2
.L15: ret
.L13: testb $7,%dl
je .L6
lea -32(,%rdx,4),%rax
andq $-8,%rdx
lea (%rdi,%rax),%rcx
add %rsi,%rax
vmovdqu (%rax),%ymm2
vpminsd (%rcx),%ymm2,%ymm1
vpmaxsd (%rcx),%ymm2,%ymm0
vmovdqu %ymm1,(%rcx)
vmovdqu %ymm0,(%rax)
.L6: xor %eax,%eax
.L7: vmovdqu (%rdi,%rax),%ymm4
vpminsd (%rsi,%rax),%ymm4,%ymm1
vpmaxsd (%rsi,%rax),%ymm4,%ymm0
vmovdqu %ymm1,(%rdi,%rax)
vmovdqu %ymm0,(%rsi,%rax)
add $32,%rax
sub $8,%rdx
jne .L7
ret
.endfn minmax_vector
int32_twostages_32:
sub $-128,%rdi
.L17: lea -128(%rdi),%rax
test %rsi,%rsi
jle .L21
.L18: vmovdqu (%rax),%ymm5
vmovdqu 128(%rax),%ymm7
add $32,%rax
vpminsd 352(%rax),%ymm7,%ymm3
vpminsd 224(%rax),%ymm5,%ymm2
vpmaxsd 224(%rax),%ymm5,%ymm0
vpmaxsd 352(%rax),%ymm7,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,-32(%rax)
vmovdqu %ymm2,96(%rax)
vmovdqu %ymm3,224(%rax)
vmovdqu %ymm0,352(%rax)
cmp %rax,%rdi
jne .L18
add $-128,%rsi
add $512,%rdi
jmp .L17
.L21: ret
.endfn int32_twostages_32
int32_threestages:
push %rbp
imul $-24,%rdx,%r8
lea 0(,%rdx,8),%rax
mov %rsp,%rbp
push %r15
push %r14
push %r13
push %r12
push %rbx
andq $-32,%rsp
sub $64,%rsp
mov %rax,56(%rsp)
lea 0(,%rdx,4),%rax
lea (%rdi,%rax),%rcx
mov %rsi,8(%rsp)
lea (%rcx,%rax),%rsi
lea (%rsi,%rax),%r9
lea (%r9,%rax),%r11
lea (%r11,%rax),%r12
lea (%r12,%rax),%r14
lea (%r14,%rax),%r15
lea (%r15,%r8),%rbx
mov %rbx,40(%rsp)
add %rax,%rbx
lea (%rbx,%rax),%r10
mov %rbx,32(%rsp)
lea (%r10,%rax),%rbx
lea (%rbx,%rax),%r13
lea 0(%r13,%rax),%r8
mov %r8,24(%rsp)
add %r8,%rax
mov %rax,16(%rsp)
xor %eax,%eax
.L23: mov 56(%rsp),%r8
add %rax,%r8
mov %r8,48(%rsp)
cmp 8(%rsp),%r8
jg .L28
.L25: cmp %rdx,%rax
jge .L29
vmovdqu (%rdi,%rax,4),%ymm3
vmovdqu (%rsi,%rax,4),%ymm6
vpminsd (%r11,%rax,4),%ymm3,%ymm7
vpmaxsd (%r11,%rax,4),%ymm3,%ymm4
vpmaxsd (%r14,%rax,4),%ymm6,%ymm0
vmovdqu (%rcx,%rax,4),%ymm3
vmovdqu (%rsi,%rax,4),%ymm5
vpminsd (%r12,%rax,4),%ymm3,%ymm2
vpmaxsd (%r12,%rax,4),%ymm3,%ymm1
vpminsd (%r14,%rax,4),%ymm5,%ymm5
vmovdqu (%r9,%rax,4),%ymm3
vpminsd (%r15,%rax,4),%ymm3,%ymm6
vpmaxsd (%r15,%rax,4),%ymm3,%ymm3
vpminsd %ymm5,%ymm7,%ymm8
mov 40(%rsp),%r8
vpmaxsd %ymm5,%ymm7,%ymm5
vpminsd %ymm6,%ymm2,%ymm7
vpminsd %ymm7,%ymm8,%ymm9
vpmaxsd %ymm6,%ymm2,%ymm2
vpminsd %ymm0,%ymm4,%ymm6
vpmaxsd %ymm0,%ymm4,%ymm0
vmovdqu %ymm9,(%rdi,%rax,4)
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpmaxsd %ymm7,%ymm8,%ymm3
vpminsd %ymm2,%ymm5,%ymm7
vmovdqu %ymm3,(%r8,%rax,4)
mov 32(%rsp),%r8
vpmaxsd %ymm2,%ymm5,%ymm2
vpminsd %ymm4,%ymm6,%ymm5
vpmaxsd %ymm4,%ymm6,%ymm6
vpminsd %ymm1,%ymm0,%ymm4
vmovdqu %ymm7,(%r8,%rax,4)
mov 24(%rsp),%r8
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,(%r10,%rax,4)
vmovdqu %ymm5,(%rbx,%rax,4)
vmovdqu %ymm6,0(%r13,%rax,4)
vmovdqu %ymm4,(%r8,%rax,4)
mov 16(%rsp),%r8
vmovdqu %ymm0,(%r8,%rax,4)
add $8,%rax
jmp .L25
.L29: mov 48(%rsp),%rax
add 56(%rsp),%rdx
jmp .L23
.L28: lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn int32_threestages
merge16_finish:
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm3,%ymm2
vperm2i128 $49,%ymm0,%ymm3,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm3
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vperm2i128 $32,%ymm0,%ymm1,%ymm2
vperm2i128 $49,%ymm0,%ymm1,%ymm0
test %esi,%esi
je .L31
vpcmpeqd %ymm1,%ymm1,%ymm1
vpxor %ymm1,%ymm2,%ymm2
vpxor %ymm1,%ymm0,%ymm0
.L31: vmovdqu %ymm2,(%rdi)
vmovdqu %ymm0,32(%rdi)
ret
.endfn merge16_finish
int32_sort_2power:
push %r13
lea 16(%rsp),%r13
andq $-32,%rsp
push -8(%r13)
push %rbp
mov %rsp,%rbp
push %r15
push %r14
push %r13
push %r12
mov %rdi,%r12
push %rbx
sub $264,%rsp
mov %edx,-116(%rbp)
cmp $8,%rsi
jne .L36
mov 4(%rdi),%edx
mov (%rdi),%r8d
mov 8(%rdi),%ecx
mov 28(%r12),%r9d
cmp %r8d,%edx
mov %edx,%eax
cmovg %r8d,%edx
cmovg %eax,%r8d
mov 12(%rdi),%eax
cmp %ecx,%eax
mov %eax,%esi
cmovg %ecx,%eax
cmovg %esi,%ecx
cmp %r8d,%ecx
mov %ecx,%esi
cmovg %r8d,%ecx
cmovg %esi,%r8d
cmp %edx,%eax
mov %eax,%esi
cmovg %edx,%eax
cmovg %esi,%edx
mov 20(%rdi),%esi
mov %edx,%r10d
mov 16(%rdi),%edi
cmp %r10d,%ecx
mov %ecx,%edx
cmovg %r10d,%ecx
cmovg %edx,%r10d
cmp %edi,%esi
mov %esi,%edx
cmovg %edi,%esi
cmovg %edx,%edi
mov 24(%r12),%edx
cmp %edx,%r9d
mov %r9d,%r11d
cmovg %edx,%r9d
cmovg %r11d,%edx
cmp %edi,%edx
mov %edx,%r11d
cmovg %edi,%edx
cmovg %r11d,%edi
cmp %esi,%r9d
mov %r9d,%r11d
cmovg %esi,%r9d
cmovg %r11d,%esi
cmp %esi,%edx
mov %edx,%r11d
cmovg %esi,%edx
cmovg %r11d,%esi
cmp %r8d,%edi
mov %edi,%r11d
cmovg %r8d,%edi
cmovg %r11d,%r8d
cmp %ecx,%edx
mov %edx,%r11d
cmovg %ecx,%edx
cmovg %r11d,%ecx
mov %r8d,(%r12)
cmp %ecx,%edi
mov %edi,%r11d
cmovg %ecx,%edi
cmovg %r11d,%ecx
cmp %r10d,%esi
mov %esi,%r11d
cmovg %r10d,%esi
cmovg %r11d,%r10d
cmp %eax,%r9d
mov %r9d,%r11d
cmovg %eax,%r9d
cmovg %r11d,%eax
cmp %eax,%esi
mov %esi,%r11d
cmovg %eax,%esi
cmovg %r11d,%eax
mov %r9d,28(%r12)
cmp %r10d,%ecx
mov %ecx,%r11d
cmovg %r10d,%ecx
cmovg %r11d,%r10d
cmp %eax,%edi
mov %edi,%r11d
cmovg %eax,%edi
cmovg %r11d,%eax
mov %r10d,4(%r12)
cmp %esi,%edx
mov %edx,%r11d
cmovg %esi,%edx
cmovg %r11d,%esi
mov %ecx,8(%r12)
mov %eax,12(%r12)
mov %edi,16(%r12)
mov %esi,20(%r12)
mov %edx,24(%r12)
jmp .L35
.L36: mov %rsi,%r15
cmp $16,%rsi
jne .L38
vmovdqa .LC0(%rip),%ymm0
vpxor 32(%rdi),%ymm0,%ymm2
vpxor (%rdi),%ymm0,%ymm0
vmovdqa .LC1(%rip),%ymm4
cmp $0,-116(%rbp)
vpunpckldq %ymm2,%ymm0,%ymm1
vpunpckhdq %ymm2,%ymm0,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm3
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpminsd %ymm3,%ymm1,%ymm2
vpmaxsd %ymm3,%ymm1,%ymm1
vpxor %ymm4,%ymm2,%ymm2
vpxor %ymm4,%ymm1,%ymm1
vpunpckldq %ymm1,%ymm2,%ymm0
vpunpckhdq %ymm1,%ymm2,%ymm1
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm1
vpunpcklqdq %ymm1,%ymm3,%ymm2
vpunpckhqdq %ymm1,%ymm3,%ymm3
vpunpckldq %ymm3,%ymm2,%ymm1
vpunpckhdq %ymm3,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm0,%ymm1,%ymm2
vpmaxsd %ymm0,%ymm1,%ymm1
vpunpckldq %ymm1,%ymm2,%ymm0
vpunpckhdq %ymm1,%ymm2,%ymm1
vpxor %ymm4,%ymm1,%ymm1
vpxor %ymm4,%ymm0,%ymm0
vperm2i128 $32,%ymm1,%ymm0,%ymm3
vperm2i128 $49,%ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm2
vpunpcklqdq %ymm2,%ymm3,%ymm1
vpunpckhqdq %ymm2,%ymm3,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm2
vpunpcklqdq %ymm2,%ymm0,%ymm1
vpunpckhqdq %ymm2,%ymm0,%ymm0
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpcmpeqd %ymm0,%ymm0,%ymm0
je .L39
vpxor %ymm0,%ymm1,%ymm1
jmp .L40
.L39: vpxor %ymm0,%ymm2,%ymm2
.L40: mov -116(%rbp),%esi
vmovdqa %ymm2,%ymm0
mov %r12,%rdi
jmp .L134
.L38: cmp $32,%rsi
jne .L41
mov $1,%edx
mov $16,%esi
lea 64(%r12),%r13
call int32_sort_2power
xor %edx,%edx
mov $16,%esi
mov %r13,%rdi
call int32_sort_2power
cmp $0,-116(%rbp)
vmovdqu (%r12),%ymm4
vmovdqu 32(%r12),%ymm1
vmovdqu 64(%r12),%ymm2
vmovdqu 96(%r12),%ymm3
je .L42
vpcmpeqd %ymm0,%ymm0,%ymm0
vpxor %ymm0,%ymm4,%ymm4
vpxor %ymm0,%ymm1,%ymm1
vpxor %ymm0,%ymm2,%ymm2
vpxor %ymm0,%ymm3,%ymm3
.L42: mov -116(%rbp),%esi
vpmaxsd %ymm1,%ymm3,%ymm5
vpminsd %ymm4,%ymm2,%ymm0
mov %r12,%rdi
vpmaxsd %ymm4,%ymm2,%ymm4
vpminsd %ymm1,%ymm3,%ymm1
vmovdqa %ymm5,-80(%rbp)
vmovdqa %ymm4,-112(%rbp)
call merge16_finish
vmovdqa -80(%rbp),%ymm5
mov -116(%rbp),%esi
mov %r13,%rdi
vmovdqa -112(%rbp),%ymm4
vmovdqa %ymm5,%ymm1
vmovdqa %ymm4,%ymm0
.L134: add $264,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
jmp merge16_finish
.L41: mov %rsi,%rax
sar $3,%rax
mov %rax,-80(%rbp)
lea 0(,%rax,4),%r13
salq $3,%rax
imul $-20,-80(%rbp),%rdx
lea (%rdi,%rax),%rdi
lea (%rdi,%rax),%rsi
lea (%rsi,%rax),%rcx
add %rcx,%rdx
lea (%rdx,%rax),%r9
lea (%r9,%rax),%r8
add %r8,%rax
mov %rax,-136(%rbp)
mov %rax,%r10
xor %eax,%eax
.L43: cmp -80(%rbp),%rax
jge .L135
add $32,%rdi
add $32,%rsi
add $32,%rcx
add $32,%rdx
vmovdqu (%r12,%rax,4),%ymm5
add $32,%r9
add $32,%r8
add $32,%r10
vpminsd -32(%rsi),%ymm5,%ymm4
vpmaxsd -32(%rsi),%ymm5,%ymm2
vmovdqu -32(%rdi),%ymm5
vpminsd -32(%rcx),%ymm5,%ymm1
vpmaxsd -32(%rcx),%ymm5,%ymm0
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm4,%ymm1,%ymm2
vpmaxsd %ymm4,%ymm1,%ymm1
vmovdqu %ymm0,(%r12,%rax,4)
add $8,%rax
vpminsd %ymm1,%ymm3,%ymm4
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm4,-32(%rdi)
vmovdqu %ymm1,-32(%rsi)
vmovdqu %ymm2,-32(%rcx)
vmovdqu -32(%r8),%ymm5
vmovdqu -32(%r10),%ymm6
vpminsd -32(%rdx),%ymm5,%ymm1
vpminsd -32(%r9),%ymm6,%ymm3
vpmaxsd -32(%r9),%ymm6,%ymm2
vpmaxsd -32(%rdx),%ymm5,%ymm0
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vmovdqu %ymm4,-32(%rdx)
vpminsd %ymm1,%ymm3,%ymm2
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm1,-32(%r9)
vmovdqu %ymm2,-32(%r8)
vmovdqu %ymm0,-32(%r10)
jmp .L43
.L135: imul $-24,-80(%rbp),%rax
mov %rax,-128(%rbp)
cmp $127,%r15
jg .L105
.L63: lea (%r12,%r15,4),%rax
vmovdqa .LC1(%rip),%ymm10
movl $3,-272(%rbp)
mov $4,%r14d
mov %rax,-144(%rbp)
mov %r15,%rax
vmovdqa .LC3(%rip),%ymm11
sar $4,%rax
vmovdqa .LC2(%rip),%ymm12
mov %rax,-112(%rbp)
mov -136(%rbp),%rax
add -128(%rbp),%rax
mov %rax,-200(%rbp)
add %r13,%rax
mov %rax,-192(%rbp)
add %r13,%rax
mov %rax,-184(%rbp)
add %r13,%rax
mov %rax,-176(%rbp)
add %r13,%rax
mov %rax,-168(%rbp)
add %r13,%rax
mov %rax,-160(%rbp)
add %r13,%rax
mov %rax,-152(%rbp)
jmp .L46
.L105: xor %eax,%eax
vpcmpeqd %ymm0,%ymm0,%ymm0
.L45: vpxor 64(%r12,%rax,4),%ymm0,%ymm1
vpxor (%r12,%rax,4),%ymm0,%ymm2
vmovdqu %ymm1,64(%r12,%rax,4)
vmovdqu %ymm2,(%r12,%rax,4)
add $32,%rax
cmp %rax,%r15
jg .L45
mov -136(%rbp),%r14
add -128(%rbp),%r14
mov $8,%ebx
vpcmpeqd %ymm10,%ymm10,%ymm10
lea (%r14,%r13),%rax
mov %rax,-296(%rbp)
add %r13,%rax
lea (%rax,%r13),%r11
mov %rax,-176(%rbp)
lea (%r11,%r13),%rax
mov %rax,-288(%rbp)
add %r13,%rax
mov %rax,-144(%rbp)
add %r13,%rax
mov %rax,-112(%rbp)
add -128(%rbp),%rax
mov %rax,-200(%rbp)
add %r13,%rax
mov %rax,-192(%rbp)
add %r13,%rax
mov %rax,-184(%rbp)
add %r13,%rax
mov %rax,-168(%rbp)
add %r13,%rax
mov %rax,-160(%rbp)
add %r13,%rax
mov %rax,-152(%rbp)
add %r13,%rax
mov %rax,-280(%rbp)
.L64: mov %rbx,%rcx
sarq %rcx
.L47: cmp $127,%rcx
jle .L136
mov %rcx,%rdx
mov %r15,%rsi
mov %r12,%rdi
mov %r11,-272(%rbp)
sar $2,%rdx
mov %rcx,-240(%rbp)
call int32_threestages
mov -240(%rbp),%rcx
mov -272(%rbp),%r11
vpcmpeqd %ymm10,%ymm10,%ymm10
sar $3,%rcx
jmp .L47
.L136: cmp $64,%rcx
jne .L49
mov %r15,%rsi
mov %r12,%rdi
mov %r11,-240(%rbp)
call int32_twostages_32
mov -240(%rbp),%r11
vpcmpeqd %ymm10,%ymm10,%ymm10
.L54: xor %eax,%eax
jmp .L50
.L49: cmp $32,%rcx
jne .L51
mov %r12,%rax
xor %edx,%edx
.L52: vmovdqu (%rax),%ymm7
vmovdqu 32(%rax),%ymm5
add $64,%rdx
add $256,%rax
vpminsd -128(%rax),%ymm7,%ymm8
vpmaxsd -128(%rax),%ymm7,%ymm4
vpminsd -96(%rax),%ymm5,%ymm1
vpmaxsd -96(%rax),%ymm5,%ymm0
vmovdqu -192(%rax),%ymm6
vmovdqu -160(%rax),%ymm7
vpminsd -64(%rax),%ymm6,%ymm5
vpmaxsd -32(%rax),%ymm7,%ymm2
vpmaxsd -64(%rax),%ymm6,%ymm3
vmovdqu -160(%rax),%ymm6
vpminsd -32(%rax),%ymm6,%ymm6
vpminsd %ymm5,%ymm8,%ymm7
vpmaxsd %ymm5,%ymm8,%ymm5
vpminsd %ymm6,%ymm1,%ymm8
vpmaxsd %ymm6,%ymm1,%ymm1
vpminsd %ymm3,%ymm4,%ymm6
vpmaxsd %ymm3,%ymm4,%ymm3
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm8,%ymm7,%ymm9
vpmaxsd %ymm8,%ymm7,%ymm2
vpminsd %ymm1,%ymm5,%ymm7
vpmaxsd %ymm1,%ymm5,%ymm1
vmovdqu %ymm9,-256(%rax)
vpminsd %ymm4,%ymm6,%ymm5
vpmaxsd %ymm4,%ymm6,%ymm6
vmovdqu %ymm2,-224(%rax)
vpminsd %ymm0,%ymm3,%ymm4
vpmaxsd %ymm0,%ymm3,%ymm3
vmovdqu %ymm5,-128(%rax)
vmovdqu %ymm7,-192(%rax)
vmovdqu %ymm1,-160(%rax)
vmovdqu %ymm6,-96(%rax)
vmovdqu %ymm4,-64(%rax)
vmovdqu %ymm3,-32(%rax)
cmp %rdx,%r15
jg .L52
.L56: lea (%rbx,%rbx),%rdx
xor %ecx,%ecx
cmp -80(%rbp),%rdx
setne %al
sete %cl
mov %rdx,%r8
xor %esi,%esi
movzbl %al,%eax
mov %eax,-204(%rbp)
jmp .L53
.L51: cmp $16,%rcx
jne .L131
jmp .L54
.L50: vmovdqu (%r12,%rax,4),%ymm5
vmovdqu 32(%r12,%rax,4),%ymm6
vpminsd 64(%r12,%rax,4),%ymm5,%ymm2
vpminsd 96(%r12,%rax,4),%ymm6,%ymm3
vpmaxsd 64(%r12,%rax,4),%ymm5,%ymm0
vpmaxsd 96(%r12,%rax,4),%ymm6,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,32(%r12,%rax,4)
vmovdqu %ymm3,64(%r12,%rax,4)
vmovdqu %ymm0,96(%r12,%rax,4)
add $32,%rax
cmp %rax,%r15
jg .L50
jmp .L56
.L131: cmp $8,%rcx
jne .L56
xor %eax,%eax
.L57: vmovdqu 32(%r12,%rax,4),%ymm7
vpmaxsd (%r12,%rax,4),%ymm7,%ymm0
vpminsd (%r12,%rax,4),%ymm7,%ymm1
vmovdqu %ymm0,32(%r12,%rax,4)
vmovdqu %ymm1,(%r12,%rax,4)
add $16,%rax
cmp %rax,%r15
jg .L57
jmp .L56
.L59: mov -176(%rbp),%r10
vmovdqu (%r12,%rax,4),%ymm5
vpminsd (%r14,%rax,4),%ymm5,%ymm6
vpmaxsd (%r14,%rax,4),%ymm5,%ymm15
vmovdqu (%r10,%rax,4),%ymm5
mov -296(%rbp),%r10
vmovdqu (%r10,%rax,4),%ymm7
mov -288(%rbp),%r10
vmovdqa %ymm5,-240(%rbp)
vmovdqa %ymm7,-272(%rbp)
vmovdqu (%r10,%rax,4),%ymm7
mov -112(%rbp),%r10
vmovdqa -272(%rbp),%ymm5
vpminsd -240(%rbp),%ymm5,%ymm1
vpmaxsd -240(%rbp),%ymm5,%ymm5
vmovdqa %ymm7,-240(%rbp)
vmovdqa -240(%rbp),%ymm4
vpmaxsd (%r11,%rax,4),%ymm4,%ymm0
vmovdqu (%r10,%rax,4),%ymm4
vpminsd %ymm1,%ymm6,%ymm8
mov -144(%rbp),%r10
vmovdqa -240(%rbp),%ymm7
vpmaxsd %ymm1,%ymm6,%ymm6
vpminsd %ymm5,%ymm15,%ymm1
vmovdqa %ymm4,-240(%rbp)
vpminsd (%r11,%rax,4),%ymm7,%ymm7
vpmaxsd %ymm5,%ymm15,%ymm15
vmovdqu (%r10,%rax,4),%ymm4
vmovdqa %ymm4,-272(%rbp)
vmovdqa -272(%rbp),%ymm4
vpminsd -240(%rbp),%ymm4,%ymm3
vpmaxsd -240(%rbp),%ymm4,%ymm4
vpminsd %ymm3,%ymm7,%ymm2
vpmaxsd %ymm3,%ymm7,%ymm3
vpminsd %ymm4,%ymm0,%ymm7
vpmaxsd %ymm4,%ymm0,%ymm0
vpminsd %ymm2,%ymm8,%ymm14
vpminsd %ymm7,%ymm1,%ymm13
vpminsd %ymm3,%ymm6,%ymm12
vpminsd %ymm0,%ymm15,%ymm11
vmovdqa %ymm14,%ymm9
vpmaxsd %ymm3,%ymm6,%ymm6
vpmaxsd %ymm2,%ymm8,%ymm2
vmovdqa %ymm13,%ymm8
vpmaxsd %ymm7,%ymm1,%ymm1
vpmaxsd %ymm0,%ymm15,%ymm0
vmovdqa %ymm6,-240(%rbp)
vmovdqa %ymm2,%ymm5
vmovdqa -240(%rbp),%ymm3
vmovdqa %ymm1,%ymm4
vmovdqa %ymm12,%ymm7
vmovdqa %ymm11,%ymm6
vmovdqa %ymm0,%ymm15
test %ecx,%ecx
je .L58
vpxor %ymm14,%ymm10,%ymm9
vpxor %ymm13,%ymm10,%ymm8
vpxor %ymm12,%ymm10,%ymm7
vpxor %ymm11,%ymm10,%ymm6
vpxor %ymm2,%ymm10,%ymm5
vpxor %ymm1,%ymm10,%ymm4
vpxor %ymm3,%ymm10,%ymm3
vpxor %ymm0,%ymm10,%ymm15
.L58: mov -200(%rbp),%r10
vmovdqu %ymm9,(%r12,%rax,4)
vmovdqu %ymm8,(%r10,%rax,4)
mov -192(%rbp),%r10
vmovdqu %ymm7,(%r10,%rax,4)
mov -184(%rbp),%r10
vmovdqu %ymm6,(%r10,%rax,4)
mov -168(%rbp),%r10
vmovdqu %ymm5,(%r10,%rax,4)
mov -160(%rbp),%r10
vmovdqu %ymm4,(%r10,%rax,4)
mov -152(%rbp),%r10
vmovdqu %ymm3,(%r10,%rax,4)
mov -280(%rbp),%r10
vmovdqu %ymm15,(%r10,%rax,4)
add $8,%rax
.L60: cmp %rax,%rdi
jg .L59
xor $1,%ecx
lea (%rdx,%r9),%rdi
.L62: mov %rdi,%r9
sub %rbx,%r9
mov %r9,%rax
cmp %r9,%r8
jg .L60
xor -204(%rbp),%ecx
add %rdx,%rsi
add %rdx,%r8
.L53: cmp -80(%rbp),%rsi
jge .L61
lea (%rsi,%rbx),%rdi
jmp .L62
.L61: salq $4,%rbx
cmp %r15,%rbx
je .L63
mov %rdx,%rbx
jmp .L64
.L46: cmp $4,%r14
jne .L132
mov %r12,%rax
.L65: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm12,%ymm0
vpxor (%rax),%ymm12,%ymm1
add $64,%rax
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L65
.L72: mov -112(%rbp),%rbx
jmp .L68
.L132: mov %r12,%rax
cmp $2,%r14
jne .L70
.L69: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm10,%ymm2
vpxor (%rax),%ymm10,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L69
.L70: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm11,%ymm2
vpxor (%rax),%ymm11,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm2
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L70
.L137: cmp $32,%rbx
jne .L75
.L74: mov %rbx,%rdx
mov %r15,%rsi
mov %r12,%rdi
sar $3,%rbx
sar $2,%rdx
call int32_threestages
vmovdqa .LC2(%rip),%ymm12
vmovdqa .LC3(%rip),%ymm11
vmovdqa .LC1(%rip),%ymm10
.L68: cmp $127,%rbx
jle .L137
jmp .L74
.L139: sar $2,%rbx
.L75: cmp $15,%rbx
jle .L138
mov %rbx,%rcx
xor %esi,%esi
sarq %rcx
imul $-8,%rcx,%rdi
lea 0(,%rcx,4),%rdx
lea (%r12,%rdx),%r11
lea (%r11,%rdx),%r10
lea (%r10,%rdx),%r8
lea (%rdi,%r8),%rax
lea (%rax,%rdx),%r9
mov %rax,-136(%rbp)
lea (%r9,%rdx),%rax
mov %rax,-240(%rbp)
.L76: cmp %r15,%rsi
jge .L139
mov %rsi,%rax
.L78: cmp %rcx,%rax
jge .L140
vmovdqu (%r12,%rax,4),%ymm6
vmovdqu (%r11,%rax,4),%ymm5
vpminsd (%r10,%rax,4),%ymm6,%ymm2
vpminsd (%r8,%rax,4),%ymm5,%ymm3
mov -136(%rbp),%rdi
vpmaxsd (%r10,%rax,4),%ymm6,%ymm0
vpmaxsd (%r8,%rax,4),%ymm5,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,(%rdi,%rax,4)
mov -240(%rbp),%rdi
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm3,(%r9,%rax,4)
vmovdqu %ymm0,(%rdi,%rax,4)
add $8,%rax
jmp .L78
.L140: add %rdx,%rsi
add %rdx,%rcx
jmp .L76
.L138: cmp $8,%rbx
je .L109
.L83: mov -152(%rbp),%rdx
mov -160(%rbp),%rcx
xor %eax,%eax
mov -168(%rbp),%rsi
mov -176(%rbp),%rdi
mov -184(%rbp),%r8
mov -192(%rbp),%r9
mov -200(%rbp),%r10
jmp .L81
.L109: xor %eax,%eax
.L80: cmp %r15,%rax
jge .L83
vmovdqu (%r12,%rax,4),%ymm5
vpminsd 32(%r12,%rax,4),%ymm5,%ymm1
vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0
vmovdqu %ymm1,(%r12,%rax,4)
vmovdqu %ymm0,32(%r12,%rax,4)
add $16,%rax
jmp .L80
.L81: cmp -80(%rbp),%rax
jge .L141
vmovdqu (%rdi),%ymm7
add $32,%r10
add $32,%r9
add $32,%r8
add $32,%rdi
add $32,%rsi
add $32,%rcx
add $32,%rdx
vmovdqu (%r12,%rax,4),%ymm5
vmovdqu -32(%r9),%ymm6
vpminsd -32(%r10),%ymm5,%ymm3
vpmaxsd -32(%r10),%ymm5,%ymm1
vpminsd -32(%r8),%ymm6,%ymm2
vpmaxsd -32(%r8),%ymm6,%ymm0
vpminsd -32(%rsi),%ymm7,%ymm7
vmovdqu -32(%rcx),%ymm5
vmovdqu -32(%rdi),%ymm6
vpmaxsd -32(%rdx),%ymm5,%ymm4
vpminsd %ymm2,%ymm3,%ymm9
vpmaxsd -32(%rsi),%ymm6,%ymm8
vpminsd -32(%rdx),%ymm5,%ymm6
vpminsd %ymm0,%ymm1,%ymm13
vpmaxsd %ymm2,%ymm3,%ymm2
vpminsd %ymm6,%ymm7,%ymm5
vpminsd %ymm4,%ymm8,%ymm3
vpmaxsd %ymm6,%ymm7,%ymm6
vpmaxsd %ymm0,%ymm1,%ymm0
vpmaxsd %ymm4,%ymm8,%ymm4
vpminsd %ymm5,%ymm9,%ymm1
vpminsd %ymm6,%ymm2,%ymm8
vpminsd %ymm3,%ymm13,%ymm7
vmovdqu %ymm1,(%r12,%rax,4)
add $8,%rax
vpmaxsd %ymm6,%ymm2,%ymm2
vpmaxsd %ymm5,%ymm9,%ymm5
vmovdqu %ymm7,-32(%r10)
vpminsd %ymm4,%ymm0,%ymm6
vpmaxsd %ymm3,%ymm13,%ymm3
vmovdqu %ymm8,-32(%r9)
vpmaxsd %ymm4,%ymm0,%ymm0
vmovdqu %ymm6,-32(%r8)
vmovdqu %ymm5,-32(%rdi)
vmovdqu %ymm3,-32(%rsi)
vmovdqu %ymm2,-32(%rcx)
vmovdqu %ymm0,-32(%rdx)
jmp .L81
.L141: sarq %r14
decl -272(%rbp)
jne .L46
mov %r12,%rax
xor %edx,%edx
vpcmpeqd %ymm5,%ymm5,%ymm5
.L85: cmp %r15,%rdx
jge .L89
vmovdqu (%rax),%ymm7
vpunpckldq 32(%rax),%ymm7,%ymm12
vpunpckhdq 32(%rax),%ymm7,%ymm6
vmovdqu 64(%rax),%ymm7
vpunpckldq 96(%rax),%ymm7,%ymm2
vpunpckhdq 96(%rax),%ymm7,%ymm4
vmovdqu 128(%rax),%ymm7
vpunpckldq 160(%rax),%ymm7,%ymm1
vpunpckhdq 160(%rax),%ymm7,%ymm0
vpunpcklqdq %ymm2,%ymm12,%ymm8
vpunpcklqdq %ymm4,%ymm6,%ymm9
cmp $0,-116(%rbp)
vmovdqu 192(%rax),%ymm7
vpunpckhqdq %ymm2,%ymm12,%ymm12
vpunpckhqdq %ymm4,%ymm6,%ymm4
vpunpckldq 224(%rax),%ymm7,%ymm10
vpunpckhdq 224(%rax),%ymm7,%ymm3
vpunpcklqdq %ymm10,%ymm1,%ymm11
vpunpckhqdq %ymm10,%ymm1,%ymm1
vpunpcklqdq %ymm3,%ymm0,%ymm7
vpunpckhqdq %ymm3,%ymm0,%ymm0
je .L86
vpxor %ymm5,%ymm12,%ymm12
vpxor %ymm5,%ymm4,%ymm4
vpxor %ymm5,%ymm1,%ymm1
vpxor %ymm5,%ymm0,%ymm0
jmp .L87
.L86: vpxor %ymm5,%ymm8,%ymm8
vpxor %ymm5,%ymm9,%ymm9
vpxor %ymm5,%ymm11,%ymm11
vpxor %ymm5,%ymm7,%ymm7
.L87: vperm2i128 $32,%ymm11,%ymm8,%ymm3
vperm2i128 $32,%ymm1,%ymm12,%ymm6
vperm2i128 $32,%ymm7,%ymm9,%ymm10
add $64,%rdx
vperm2i128 $32,%ymm0,%ymm4,%ymm13
vperm2i128 $49,%ymm11,%ymm8,%ymm11
vperm2i128 $49,%ymm7,%ymm9,%ymm9
add $256,%rax
vperm2i128 $49,%ymm1,%ymm12,%ymm1
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vpmaxsd %ymm6,%ymm3,%ymm2
vpminsd %ymm6,%ymm3,%ymm4
vpminsd %ymm1,%ymm11,%ymm7
vpmaxsd %ymm13,%ymm10,%ymm3
vpminsd %ymm13,%ymm10,%ymm8
vpmaxsd %ymm1,%ymm11,%ymm1
vpminsd %ymm0,%ymm9,%ymm10
vpmaxsd %ymm0,%ymm9,%ymm0
vpminsd %ymm8,%ymm4,%ymm11
vpminsd %ymm3,%ymm2,%ymm9
vpmaxsd %ymm8,%ymm4,%ymm8
vpminsd %ymm10,%ymm7,%ymm6
vpmaxsd %ymm10,%ymm7,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm0,%ymm1,%ymm3
vpmaxsd %ymm0,%ymm1,%ymm1
vpminsd %ymm6,%ymm11,%ymm10
vpmaxsd %ymm6,%ymm11,%ymm0
vpminsd %ymm3,%ymm9,%ymm7
vpmaxsd %ymm3,%ymm9,%ymm6
vpminsd %ymm4,%ymm8,%ymm3
vpminsd %ymm1,%ymm2,%ymm9
vpmaxsd %ymm4,%ymm8,%ymm4
vpunpckldq %ymm7,%ymm10,%ymm8
vpmaxsd %ymm1,%ymm2,%ymm2
vpunpckhdq %ymm7,%ymm10,%ymm7
vpunpckldq %ymm9,%ymm3,%ymm1
vpunpckhdq %ymm9,%ymm3,%ymm3
vpunpckldq %ymm6,%ymm0,%ymm9
vpunpckhdq %ymm6,%ymm0,%ymm6
vpunpckldq %ymm2,%ymm4,%ymm0
vpunpckhdq %ymm2,%ymm4,%ymm2
vpunpcklqdq %ymm3,%ymm7,%ymm10
vpunpcklqdq %ymm1,%ymm8,%ymm4
vpunpcklqdq %ymm0,%ymm9,%ymm13
vpunpckhqdq %ymm1,%ymm8,%ymm8
vpunpckhqdq %ymm3,%ymm7,%ymm3
vpunpckhqdq %ymm0,%ymm9,%ymm1
vpunpcklqdq %ymm2,%ymm6,%ymm7
vpunpckhqdq %ymm2,%ymm6,%ymm0
vperm2i128 $32,%ymm13,%ymm4,%ymm12
vperm2i128 $32,%ymm1,%ymm8,%ymm11
vperm2i128 $32,%ymm0,%ymm3,%ymm6
vperm2i128 $32,%ymm7,%ymm10,%ymm9
vperm2i128 $49,%ymm13,%ymm4,%ymm4
vmovdqu %ymm12,-256(%rax)
vperm2i128 $49,%ymm1,%ymm8,%ymm1
vperm2i128 $49,%ymm7,%ymm10,%ymm2
vperm2i128 $49,%ymm0,%ymm3,%ymm0
vmovdqu %ymm11,-224(%rax)
vmovdqu %ymm9,-192(%rax)
vmovdqu %ymm6,-160(%rax)
vmovdqu %ymm4,-128(%rax)
vmovdqu %ymm1,-96(%rax)
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L85
.L142: cmp $32,-112(%rbp)
jne .L94
.L93: mov -112(%rbp),%rcx
sar $2,%rcx
lea 0(,%rcx,4),%rdx
lea 0(,%rcx,8),%rax
mov %rcx,-136(%rbp)
lea (%r12,%rdx),%r9
mov %rax,-184(%rbp)
imul $-24,%rcx,%rax
lea (%r9,%rdx),%r14
lea (%r14,%rdx),%rsi
lea (%rsi,%rdx),%rbx
lea (%rbx,%rdx),%r10
lea (%r10,%rdx),%r8
lea (%r8,%rdx),%rdi
add %rdi,%rax
mov %rax,-176(%rbp)
add %rdx,%rax
mov %rax,-168(%rbp)
add %rdx,%rax
lea (%rax,%rdx),%r11
mov %rax,-160(%rbp)
lea (%r11,%rdx),%rax
mov %rax,-200(%rbp)
add %rdx,%rax
add %rax,%rdx
mov %rax,-144(%rbp)
mov %rdx,-192(%rbp)
.L90: mov -136(%rbp),%rax
sub %rcx,%rax
cmp %rax,%r15
jg .L92
sarq $3,-112(%rbp)
.L89: cmp $127,-112(%rbp)
jle .L142
jmp .L93
.L92: cmp -136(%rbp),%rax
jge .L143
vmovdqu (%r12,%rax,4),%ymm6
vpminsd (%rbx,%rax,4),%ymm6,%ymm7
vpmaxsd (%rbx,%rax,4),%ymm6,%ymm4
vmovdqu (%r9,%rax,4),%ymm6
vpminsd (%r10,%rax,4),%ymm6,%ymm1
vpmaxsd (%r10,%rax,4),%ymm6,%ymm0
vmovdqu (%r14,%rax,4),%ymm6
vpminsd (%r8,%rax,4),%ymm6,%ymm5
vpmaxsd (%r8,%rax,4),%ymm6,%ymm3
vmovdqu (%rsi,%rax,4),%ymm6
vpminsd (%rdi,%rax,4),%ymm6,%ymm6
vpminsd %ymm5,%ymm7,%ymm9
vmovdqu (%rsi,%rax,4),%ymm2
vpmaxsd %ymm5,%ymm7,%ymm5
mov -176(%rbp),%rdx
vpminsd %ymm3,%ymm4,%ymm8
vpminsd %ymm6,%ymm1,%ymm7
vpmaxsd %ymm3,%ymm4,%ymm3
vpminsd %ymm7,%ymm9,%ymm10
vpmaxsd %ymm7,%ymm9,%ymm4
vpmaxsd (%rdi,%rax,4),%ymm2,%ymm2
vpmaxsd %ymm6,%ymm1,%ymm1
vmovdqu %ymm10,(%r12,%rax,4)
vmovdqu %ymm4,(%rdx,%rax,4)
mov -168(%rbp),%rdx
vpminsd %ymm1,%ymm5,%ymm9
vpmaxsd %ymm1,%ymm5,%ymm1
vpminsd %ymm2,%ymm0,%ymm6
vpmaxsd %ymm2,%ymm0,%ymm0
vmovdqu %ymm9,(%rdx,%rax,4)
vpminsd %ymm6,%ymm8,%ymm7
vpmaxsd %ymm6,%ymm8,%ymm2
mov -160(%rbp),%rdx
vpminsd %ymm0,%ymm3,%ymm5
vpmaxsd %ymm0,%ymm3,%ymm3
vmovdqu %ymm1,(%rdx,%rax,4)
mov -200(%rbp),%rdx
vmovdqu %ymm7,(%r11,%rax,4)
vmovdqu %ymm2,(%rdx,%rax,4)
mov -144(%rbp),%rdx
vmovdqu %ymm5,(%rdx,%rax,4)
mov -192(%rbp),%rdx
vmovdqu %ymm3,(%rdx,%rax,4)
add $8,%rax
jmp .L92
.L143: mov -184(%rbp),%rdx
add %rdx,-136(%rbp)
jmp .L90
.L145: sarq $2,-112(%rbp)
.L94: cmp $15,-112(%rbp)
jle .L144
mov -112(%rbp),%rcx
xor %esi,%esi
sarq %rcx
imul $-8,%rcx,%rdi
lea 0(,%rcx,4),%rdx
lea (%r12,%rdx),%r11
lea (%r11,%rdx),%r10
lea (%r10,%rdx),%r8
add %r8,%rdi
lea (%rdi,%rdx),%r9
lea (%r9,%rdx),%rbx
.L95: cmp %r15,%rsi
jge .L145
mov %rsi,%rax
.L97: cmp %rcx,%rax
jge .L146
vmovdqu (%r12,%rax,4),%ymm5
vpminsd (%r10,%rax,4),%ymm5,%ymm2
vpmaxsd (%r10,%rax,4),%ymm5,%ymm0
vmovdqu (%r11,%rax,4),%ymm5
vpminsd (%r8,%rax,4),%ymm5,%ymm3
vpmaxsd (%r8,%rax,4),%ymm5,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,(%rdi,%rax,4)
vmovdqu %ymm3,(%r9,%rax,4)
vmovdqu %ymm0,(%rbx,%rax,4)
add $8,%rax
jmp .L97
.L146: add %rdx,%rsi
add %rdx,%rcx
jmp .L95
.L144: cmp $8,-112(%rbp)
je .L111
.L102: mov -152(%rbp),%rdx
add -128(%rbp),%rdx
xor %ecx,%ecx
vpcmpeqd %ymm6,%ymm6,%ymm6
lea (%rdx,%r13),%r10
lea (%r10,%r13),%r9
lea (%r9,%r13),%r8
lea (%r8,%r13),%rdi
lea (%rdi,%r13),%rsi
lea (%rsi,%r13),%rax
jmp .L100
.L111: xor %eax,%eax
.L99: cmp %r15,%rax
jge .L102
vmovdqu (%r12,%rax,4),%ymm5
vpminsd 32(%r12,%rax,4),%ymm5,%ymm1
vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0
vmovdqu %ymm1,(%r12,%rax,4)
vmovdqu %ymm0,32(%r12,%rax,4)
add $16,%rax
jmp .L99
.L104: vmovdqu (%r10),%ymm7
vmovdqu (%r12,%rcx,4),%ymm4
vpminsd (%r9),%ymm7,%ymm3
vpminsd (%rdx),%ymm4,%ymm5
vpmaxsd (%r9),%ymm7,%ymm2
vpmaxsd (%rdx),%ymm4,%ymm4
vmovdqu (%r8),%ymm7
vmovdqu (%rsi),%ymm14
vpminsd %ymm3,%ymm5,%ymm11
vpmaxsd %ymm3,%ymm5,%ymm3
vpminsd (%rdi),%ymm7,%ymm1
vpminsd %ymm2,%ymm4,%ymm10
cmp $0,-116(%rbp)
vpmaxsd (%rdi),%ymm7,%ymm0
vmovdqu (%rsi),%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2
vpminsd (%rax),%ymm7,%ymm7
vpmaxsd (%rax),%ymm14,%ymm9
vpminsd %ymm7,%ymm1,%ymm8
vpmaxsd %ymm7,%ymm1,%ymm1
vpminsd %ymm9,%ymm0,%ymm7
vpmaxsd %ymm9,%ymm0,%ymm0
vpminsd %ymm8,%ymm11,%ymm5
vpminsd %ymm1,%ymm3,%ymm9
vpminsd %ymm7,%ymm10,%ymm12
vpmaxsd %ymm1,%ymm3,%ymm3
vpminsd %ymm0,%ymm2,%ymm4
vpmaxsd %ymm8,%ymm11,%ymm8
vpmaxsd %ymm0,%ymm2,%ymm2
vpmaxsd %ymm7,%ymm10,%ymm7
vpunpckldq %ymm8,%ymm5,%ymm11
vpunpckldq %ymm7,%ymm12,%ymm10
vpunpckhdq %ymm8,%ymm5,%ymm8
vpunpckhdq %ymm7,%ymm12,%ymm7
vpunpckhdq %ymm3,%ymm9,%ymm5
vpunpckldq %ymm2,%ymm4,%ymm1
vpunpckldq %ymm3,%ymm9,%ymm0
vpunpckhdq %ymm2,%ymm4,%ymm4
vpunpcklqdq %ymm0,%ymm11,%ymm3
vpunpckhqdq %ymm0,%ymm11,%ymm9
vpunpcklqdq %ymm5,%ymm8,%ymm2
vpunpcklqdq %ymm4,%ymm7,%ymm11
vpunpckhqdq %ymm5,%ymm8,%ymm5
vpunpcklqdq %ymm1,%ymm10,%ymm12
vpunpckhqdq %ymm4,%ymm7,%ymm0
vpunpckhqdq %ymm1,%ymm10,%ymm1
vperm2i128 $32,%ymm11,%ymm2,%ymm8
vperm2i128 $32,%ymm12,%ymm3,%ymm10
vperm2i128 $32,%ymm1,%ymm9,%ymm7
vperm2i128 $32,%ymm0,%ymm5,%ymm4
vperm2i128 $49,%ymm12,%ymm3,%ymm3
vperm2i128 $49,%ymm11,%ymm2,%ymm2
vperm2i128 $49,%ymm1,%ymm9,%ymm1
vperm2i128 $49,%ymm0,%ymm5,%ymm0
je .L103
vpxor %ymm6,%ymm10,%ymm10
vpxor %ymm6,%ymm8,%ymm8
vpxor %ymm6,%ymm7,%ymm7
vpxor %ymm6,%ymm4,%ymm4
vpxor %ymm6,%ymm3,%ymm3
vpxor %ymm6,%ymm2,%ymm2
vpxor %ymm6,%ymm1,%ymm1
vpxor %ymm6,%ymm0,%ymm0
.L103: add $32,%rdx
add $32,%r10
add $32,%r9
add $32,%r8
vmovdqu %ymm10,(%r12,%rcx,4)
add $32,%rdi
add $8,%rcx
add $32,%rsi
vmovdqu %ymm3,-32(%rdx)
add $32,%rax
vmovdqu %ymm8,-32(%r10)
vmovdqu %ymm2,-32(%r9)
vmovdqu %ymm7,-32(%r8)
vmovdqu %ymm1,-32(%rdi)
vmovdqu %ymm4,-32(%rsi)
vmovdqu %ymm0,-32(%rax)
.L100: cmp -80(%rbp),%rcx
jl .L104
.L35: add $264,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
ret
.endfn int32_sort_2power
.rodata.cst32
.LC0: .quad -1,0,-1,0
.LC1: .quad 0,-1,-1,0
.LC2: .quad -1,-1,0,0
.LC3: .quad -4294967296,4294967295,-4294967296,4294967295
.LC4: .quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff