#include "libc/macros.h" .source __FILE__ / D.J. Bernstein's outrageously fast integer sorting algorithm. / / @param rdi is int32 array / @param rsi is number of elements in rdi / @note public domain / @see en.wikipedia.org/wiki/Sorting_network djbsort$avx2: push %rbp mov %rsp,%rbp push %r15 push %r14 push %r13 mov %rsi,%r13 push %r12 mov %rdi,%r12 push %rbx andq $-32,%rsp sub $1056,%rsp cmp $8,%rsi jg .L148 jne .L149 mov (%rdi),%eax mov 4(%rdi),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,(%rdi) mov 8(%rdi),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,4(%rdi) mov 12(%rdi),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,8(%rdi) mov 16(%rdi),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,12(%rdi) mov 20(%rdi),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,16(%rdi) mov 24(%rdi),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,20(%rdi) mov 28(%rdi),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,24(%rdi) mov %edx,28(%rdi) jmp .L150 .L149: cmp $7,%rsi jne .L151 .L150: mov (%r12),%edx mov 4(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,(%r12) mov 8(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,4(%r12) mov 12(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,8(%r12) mov 16(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,12(%r12) mov 20(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,16(%r12) mov 24(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,20(%r12) mov %edx,24(%r12) jmp .L152 .L151: cmp $6,%rsi jne .L153 .L152: mov (%r12),%eax mov 4(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,(%r12) mov 8(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,4(%r12) mov 12(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,8(%r12) mov 16(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,12(%r12) mov 20(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,16(%r12) mov %edx,20(%r12) jmp .L154 .L153: cmp $5,%rsi jne .L155 .L154: mov (%r12),%edx mov 4(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,(%r12) mov 8(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,4(%r12) mov 12(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,8(%r12) mov 16(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,12(%r12) mov %edx,16(%r12) jmp .L156 .L155: cmp $4,%rsi jne .L157 .L156: mov (%r12),%eax mov 4(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,(%r12) mov 8(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,4(%r12) mov 12(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,8(%r12) mov %edx,12(%r12) jmp .L158 .L157: cmp $3,%rsi jne .L159 .L158: mov (%r12),%edx mov 4(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,(%r12) mov 8(%r12),%edx cmp %edx,%eax mov %eax,%ecx cmovg %edx,%eax cmovg %ecx,%edx mov %eax,4(%r12) mov %edx,8(%r12) jmp .L160 .L159: cmp $2,%rsi jne .L147 .L160: mov (%r12),%edx mov 4(%r12),%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,(%r12) mov %eax,4(%r12) jmp .L147 .L148: lea -1(%rsi),%rax mov $8,%ebx test %rsi,%rax jne .L162 xor %edx,%edx call int32_sort_2power jmp .L147 .L162: mov %r13,%r14 sub %rbx,%r14 cmp %rbx,%r14 jle .L199 add %rbx,%rbx jmp .L162 .L199: cmp $128,%rbx jg .L164 mov %rbx,%rax mov %rbx,%rdx vmovdqa .LC4(%rip),%ymm0 sar $3,%rax sar $2,%rdx .L165: cmp %rdx,%rax jge .L200 mov %rax,%rcx incq %rax salq $5,%rcx vmovdqa %ymm0,32(%rsp,%rcx) jmp .L165 .L200: xor %eax,%eax .L167: mov (%r12,%rax,4),%edx mov %rax,%r14 mov %edx,32(%rsp,%rax,4) lea 1(%rax),%rax cmp %rax,%r13 jne .L167 lea (%rbx,%rbx),%rsi xor %edx,%edx lea 32(%rsp),%rdi call int32_sort_2power xor %eax,%eax .L168: mov 32(%rsp,%rax,4),%ecx mov %rax,%rdx mov %ecx,(%r12,%rax,4) incq %rax cmp %rdx,%r14 jne .L168 jmp .L147 .L164: mov %rbx,%rsi mov %r12,%rdi mov $1,%edx call int32_sort_2power lea (%r12,%rbx,4),%rdi mov %r14,%rsi call djbsort$avx2 .L175: mov %rbx,%r14 mov %r13,%rsi mov %r12,%rdi sar $2,%r14 mov %r14,%rdx call int32_threestages lea 0(,%r14,4),%r10 mov %r13,%rdx lea (%r10,%rax),%r11 sub %r10,%rdx lea (%r12,%rax,4),%rdi mov %rax,%r9 sub %rax,%rdx lea (%r12,%r11,4),%rsi call minmax_vector lea (%r14,%r14),%rax mov %rax,24(%rsp) cmp %r13,%r11 jg .L169 imul $-8,%r14,%rax lea (%r12,%r10),%rdx lea (%rdx,%r10),%rcx lea (%r14,%r9),%r15 lea (%rcx,%r10),%rdi add %rdi,%rax lea (%rax,%r10),%rsi lea (%rsi,%r10),%r8 .L170: cmp %r9,%r15 jle .L201 vmovdqu (%rcx,%r9,4),%ymm7 vmovdqu (%rdi,%r9,4),%ymm6 vpminsd (%r12,%r9,4),%ymm7,%ymm2 vpminsd (%rdx,%r9,4),%ymm6,%ymm3 vpmaxsd (%r12,%r9,4),%ymm7,%ymm0 vpmaxsd (%rdx,%r9,4),%ymm6,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%r12,%r9,4) vmovdqu %ymm2,(%rax,%r9,4) vmovdqu %ymm3,(%rsi,%r9,4) vmovdqu %ymm0,(%r8,%r9,4) add $8,%r9 jmp .L170 .L201: mov %r11,%r9 .L169: mov 24(%rsp),%rax lea (%r14,%r14),%r15 mov %r13,%r11 lea (%r12,%r9,4),%rdi sub %r15,%r11 add %r9,%rax mov %r11,%rdx lea (%r12,%rax,4),%rsi sub %r9,%rdx call minmax_vector lea (%r15,%r9),%rax cmp %r13,%rax jg .L172 mov %rax,%rdx add %r12,%r10 sub %r14,%rdx .L173: cmp %r9,%rdx jle .L202 vmovdqu (%r10,%r9,4),%ymm6 vpminsd (%r12,%r9,4),%ymm6,%ymm1 vpmaxsd (%r12,%r9,4),%ymm6,%ymm0 vmovdqu %ymm1,(%r12,%r9,4) vmovdqu %ymm0,(%r10,%r9,4) add $8,%r9 jmp .L173 .L202: mov %rax,%r9 .L172: lea (%r11,%r14),%rdx add %r9,%r14 lea (%r12,%r9,4),%rdi sar $3,%rbx sub %r9,%rdx lea (%r12,%r14,4),%rsi call minmax_vector cmp $63,%rbx jg .L175 cmp $32,%rbx jne .L176 mov %r12,%rax mov $63,%edx .L177: cmp %r13,%rdx jge .L203 vmovdqu (%rax),%ymm6 add $64,%rdx add $256,%rax vpminsd -128(%rax),%ymm6,%ymm10 vpmaxsd -128(%rax),%ymm6,%ymm8 vmovdqu -224(%rax),%ymm6 vpminsd -96(%rax),%ymm6,%ymm3 vpmaxsd -96(%rax),%ymm6,%ymm0 vmovdqu -192(%rax),%ymm6 vpminsd -64(%rax),%ymm6,%ymm2 vpmaxsd -64(%rax),%ymm6,%ymm1 vmovdqu -160(%rax),%ymm6 vpmaxsd -32(%rax),%ymm6,%ymm4 vpminsd -32(%rax),%ymm6,%ymm13 vpminsd %ymm2,%ymm10,%ymm15 vpminsd %ymm1,%ymm8,%ymm12 vpminsd %ymm13,%ymm3,%ymm11 vpminsd %ymm4,%ymm0,%ymm5 vpmaxsd %ymm1,%ymm8,%ymm1 vpmaxsd %ymm2,%ymm10,%ymm2 vpmaxsd %ymm13,%ymm3,%ymm13 vpmaxsd %ymm4,%ymm0,%ymm0 vpminsd %ymm13,%ymm2,%ymm10 vpminsd %ymm0,%ymm1,%ymm4 vpminsd %ymm5,%ymm12,%ymm9 vpminsd %ymm11,%ymm15,%ymm14 vpmaxsd %ymm13,%ymm2,%ymm13 vpmaxsd %ymm0,%ymm1,%ymm0 vpmaxsd %ymm11,%ymm15,%ymm15 vpmaxsd %ymm5,%ymm12,%ymm12 vperm2i128 $32,%ymm13,%ymm10,%ymm6 vperm2i128 $32,%ymm12,%ymm9,%ymm5 vperm2i128 $32,%ymm0,%ymm4,%ymm8 vperm2i128 $32,%ymm15,%ymm14,%ymm11 vperm2i128 $49,%ymm0,%ymm4,%ymm0 vperm2i128 $49,%ymm12,%ymm9,%ymm12 vperm2i128 $49,%ymm15,%ymm14,%ymm14 vperm2i128 $49,%ymm13,%ymm10,%ymm13 vpminsd %ymm14,%ymm11,%ymm3 vpminsd %ymm12,%ymm5,%ymm1 vpminsd %ymm13,%ymm6,%ymm2 vpmaxsd %ymm12,%ymm5,%ymm9 vpmaxsd %ymm14,%ymm11,%ymm11 vpminsd %ymm0,%ymm8,%ymm12 vperm2i128 $32,%ymm9,%ymm1,%ymm5 vpmaxsd %ymm0,%ymm8,%ymm8 vpmaxsd %ymm13,%ymm6,%ymm10 vperm2i128 $32,%ymm11,%ymm3,%ymm7 vperm2i128 $32,%ymm10,%ymm2,%ymm6 vperm2i128 $49,%ymm11,%ymm3,%ymm11 vperm2i128 $49,%ymm10,%ymm2,%ymm10 vperm2i128 $49,%ymm9,%ymm1,%ymm9 vperm2i128 $32,%ymm8,%ymm12,%ymm4 vperm2i128 $49,%ymm8,%ymm12,%ymm8 vpunpcklqdq %ymm11,%ymm7,%ymm3 vpunpcklqdq %ymm10,%ymm6,%ymm2 vpunpcklqdq %ymm9,%ymm5,%ymm1 vpunpcklqdq %ymm8,%ymm4,%ymm0 vpunpckhqdq %ymm11,%ymm7,%ymm7 vpunpckhqdq %ymm10,%ymm6,%ymm6 vpunpckhqdq %ymm9,%ymm5,%ymm5 vpunpckhqdq %ymm8,%ymm4,%ymm4 vpminsd %ymm3,%ymm7,%ymm11 vpminsd %ymm2,%ymm6,%ymm10 vpminsd %ymm1,%ymm5,%ymm9 vpminsd %ymm0,%ymm4,%ymm8 vpmaxsd %ymm3,%ymm7,%ymm7 vpmaxsd %ymm2,%ymm6,%ymm6 vpmaxsd %ymm1,%ymm5,%ymm5 vpunpckldq %ymm7,%ymm11,%ymm3 vpmaxsd %ymm0,%ymm4,%ymm4 vpunpckhdq %ymm7,%ymm11,%ymm7 vpunpckldq %ymm6,%ymm10,%ymm2 vpunpckldq %ymm5,%ymm9,%ymm1 vpunpckhdq %ymm6,%ymm10,%ymm6 vpunpckhdq %ymm5,%ymm9,%ymm5 vpunpckldq %ymm4,%ymm8,%ymm0 vpunpckhdq %ymm4,%ymm8,%ymm4 vpunpcklqdq %ymm7,%ymm3,%ymm10 vpunpcklqdq %ymm5,%ymm1,%ymm8 vpunpckhqdq %ymm7,%ymm3,%ymm3 vpunpcklqdq %ymm6,%ymm2,%ymm9 vpunpcklqdq %ymm4,%ymm0,%ymm7 vpunpckhqdq %ymm6,%ymm2,%ymm2 vpunpckhqdq %ymm5,%ymm1,%ymm1 vpunpckhqdq %ymm4,%ymm0,%ymm0 vpminsd %ymm8,%ymm1,%ymm5 vpminsd %ymm9,%ymm2,%ymm6 vpminsd %ymm7,%ymm0,%ymm4 vpminsd %ymm10,%ymm3,%ymm11 vpmaxsd %ymm8,%ymm1,%ymm1 vpmaxsd %ymm7,%ymm0,%ymm0 vpmaxsd %ymm10,%ymm3,%ymm3 vpmaxsd %ymm9,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm6,%ymm7 vpunpckldq %ymm3,%ymm11,%ymm8 vpunpckhdq %ymm2,%ymm6,%ymm2 vpunpckhdq %ymm3,%ymm11,%ymm3 vpunpckldq %ymm1,%ymm5,%ymm6 vpunpckhdq %ymm1,%ymm5,%ymm1 vmovdqu %ymm8,-256(%rax) vpunpckldq %ymm0,%ymm4,%ymm5 vpunpckhdq %ymm0,%ymm4,%ymm0 vmovdqu %ymm3,-224(%rax) vmovdqu %ymm7,-192(%rax) vmovdqu %ymm2,-160(%rax) vmovdqu %ymm6,-128(%rax) vmovdqu %ymm1,-96(%rax) vmovdqu %ymm5,-64(%rax) vmovdqu %ymm0,-32(%rax) jmp .L177 .L203: mov %r13,%rdi mov %r13,%r9 lea -32(%r13),%rdx shr $6,%rdi andq $-64,%r9 salq $8,%rdi sub %r9,%rdx lea 128(%r12,%rdi),%rsi add %r12,%rdi call minmax_vector jmp .L180 .L176: xor %r10d,%r10d cmp $16,%rbx jne .L181 xor %r9d,%r9d .L180: lea 31(%r9),%rax .L179: cmp %r13,%rax jge .L204 vmovdqu -124(%r12,%rax,4),%ymm6 vpminsd -60(%r12,%rax,4),%ymm6,%ymm5 vpmaxsd -60(%r12,%rax,4),%ymm6,%ymm0 vmovdqu -92(%r12,%rax,4),%ymm6 vpminsd -28(%r12,%rax,4),%ymm6,%ymm1 vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2 vpminsd %ymm1,%ymm5,%ymm3 vpminsd %ymm2,%ymm0,%ymm4 vpmaxsd %ymm1,%ymm5,%ymm5 vpmaxsd %ymm2,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm4,%ymm2 vperm2i128 $32,%ymm5,%ymm3,%ymm1 vperm2i128 $49,%ymm0,%ymm4,%ymm0 vperm2i128 $49,%ymm5,%ymm3,%ymm3 vpminsd %ymm0,%ymm2,%ymm4 vpmaxsd %ymm0,%ymm2,%ymm0 vpminsd %ymm3,%ymm1,%ymm5 vpmaxsd %ymm3,%ymm1,%ymm1 vperm2i128 $32,%ymm0,%ymm4,%ymm2 vperm2i128 $32,%ymm1,%ymm5,%ymm3 vperm2i128 $49,%ymm0,%ymm4,%ymm4 vperm2i128 $49,%ymm1,%ymm5,%ymm5 vpunpcklqdq %ymm5,%ymm3,%ymm1 vpunpcklqdq %ymm4,%ymm2,%ymm0 vpunpckhqdq %ymm5,%ymm3,%ymm3 vpunpckhqdq %ymm4,%ymm2,%ymm2 vpminsd %ymm3,%ymm1,%ymm5 vpmaxsd %ymm3,%ymm1,%ymm1 vpminsd %ymm2,%ymm0,%ymm4 vpmaxsd %ymm2,%ymm0,%ymm0 vpunpckldq %ymm1,%ymm5,%ymm3 vpunpckldq %ymm0,%ymm4,%ymm2 vpunpckhdq %ymm1,%ymm5,%ymm5 vpunpckhdq %ymm0,%ymm4,%ymm4 vpunpcklqdq %ymm5,%ymm3,%ymm1 vpunpcklqdq %ymm4,%ymm2,%ymm0 vpunpckhqdq %ymm5,%ymm3,%ymm3 vpunpckhqdq %ymm4,%ymm2,%ymm2 vpminsd %ymm3,%ymm1,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpunpckldq %ymm1,%ymm4,%ymm5 vpunpckldq %ymm0,%ymm3,%ymm2 vpunpckhdq %ymm1,%ymm4,%ymm1 vpunpckhdq %ymm0,%ymm3,%ymm0 vmovdqu %ymm5,-124(%r12,%rax,4) vmovdqu %ymm1,-92(%r12,%rax,4) vmovdqu %ymm2,-60(%r12,%rax,4) vmovdqu %ymm0,-28(%r12,%rax,4) add $32,%rax jmp .L179 .L204: mov %r13,%r10 xor %edx,%edx lea 0(,%r9,4),%rax sub %r9,%r10 mov %r10,%rdi andq $-32,%r10 shr $5,%rdi cmp %r9,%r13 cmovl %rdx,%r10 salq $7,%rdi add %r9,%r10 cmp %r9,%r13 cmovl %rdx,%rdi lea -16(%r13),%rdx sub %r10,%rdx lea 64(%rax,%rdi),%rsi add %rax,%rdi add %r12,%rsi add %r12,%rdi call minmax_vector .L181: lea 15(%r10),%rax .L183: cmp %r13,%rax jge .L205 vmovdqu -60(%r12,%rax,4),%ymm6 vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2 vpminsd -28(%r12,%rax,4),%ymm6,%ymm1 vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm0 vmovdqu %ymm1,-60(%r12,%rax,4) vmovdqu %ymm0,-28(%r12,%rax,4) add $16,%rax jmp .L183 .L205: mov %r13,%r9 xor %edx,%edx lea 0(,%r10,4),%rcx sub %r10,%r9 mov %r9,%rax andq $-16,%r9 shr $4,%rax cmp %r10,%r13 cmovl %rdx,%r9 salq $6,%rax add %r10,%r9 cmp %r10,%r13 cmovl %rdx,%rax lea -8(%r13),%rdx sub %r9,%rdx lea (%rax,%rcx),%r10 lea 32(%rcx,%rax),%rsi add %r12,%r10 add %r12,%rsi mov %r10,%rdi call minmax_vector lea 7(%r9),%rax cmp %r13,%rax jge .L185 lea 16(,%r9,4),%rax mov (%r10),%ecx add $8,%r9 lea -12(%r12,%rax),%r14 lea (%r12,%rax),%rbx lea 4(%r12,%rax),%r11 mov (%rbx),%edx lea 8(%r12,%rax),%r8 cmp %edx,%ecx mov %ecx,%esi cmovg %edx,%ecx cmovg %esi,%edx mov %ecx,(%r10) mov %edx,(%rbx) mov (%r14),%ecx mov (%r11),%edx cmp %edx,%ecx mov %ecx,%esi cmovg %edx,%ecx cmovg %esi,%edx lea -8(%r12,%rax),%rsi mov %ecx,(%r14) mov %edx,(%r11) mov (%rsi),%ecx mov (%r8),%edx cmp %edx,%ecx mov %ecx,%edi cmovg %edx,%ecx cmovg %edi,%edx lea 12(%r12,%rax),%rdi mov %ecx,(%rsi) lea -4(%r12,%rax),%rcx mov %edx,(%r8) mov (%rcx),%edx mov (%rdi),%eax cmp %eax,%edx mov %edx,%r15d cmovg %eax,%edx cmovg %r15d,%eax mov %edx,(%rcx) mov %eax,(%rdi) mov (%r10),%edx mov (%rsi),%eax cmp %eax,%edx mov %edx,%r15d cmovg %eax,%edx cmovg %r15d,%eax mov %edx,(%r10) mov %eax,(%rsi) mov (%rcx),%eax mov (%r14),%edx cmp %eax,%edx mov %edx,%r15d cmovg %eax,%edx cmovg %r15d,%eax mov %edx,(%r14) mov %eax,(%rcx) mov (%r10),%edx mov (%r14),%eax cmp %eax,%edx mov %edx,%r15d cmovg %eax,%edx cmovg %r15d,%eax mov %edx,(%r10) mov %eax,(%r14) mov (%rsi),%edx mov (%rcx),%eax cmp %eax,%edx mov %edx,%r10d cmovg %eax,%edx cmovg %r10d,%eax mov %edx,(%rsi) mov %eax,(%rcx) mov (%rbx),%edx mov (%r8),%esi mov (%rdi),%ecx cmp %esi,%edx mov %edx,%eax cmovg %esi,%edx cmovg %eax,%esi mov (%r11),%eax cmp %ecx,%eax mov %eax,%r10d cmovg %ecx,%eax cmovg %r10d,%ecx cmp %eax,%edx mov %edx,%r10d cmovg %eax,%edx cmovg %r10d,%eax mov %edx,(%rbx) mov %esi,%edx mov %eax,(%r11) mov %ecx,%eax cmp %eax,%edx mov %edx,%ecx cmovg %eax,%edx cmovg %ecx,%eax mov %edx,(%r8) mov %eax,(%rdi) .L185: lea 4(%r9),%r10 lea -4(%r13),%rdx lea 0(,%r10,4),%rbx sub %r9,%rdx lea -16(%r12,%rbx),%r11 lea (%r12,%rbx),%rsi mov %r11,%rdi call minmax_vector lea 3(%r9),%rax cmp %r13,%rax jge .L186 lea -8(%r12,%rbx),%rcx mov (%r11),%edx lea -12(%r12,%rbx),%rdi mov %r10,%r9 mov (%rcx),%eax cmp %eax,%edx mov %edx,%esi cmovg %eax,%edx cmovg %esi,%eax lea -4(%r12,%rbx),%rsi mov %edx,(%r11) mov %eax,(%rcx) mov (%rdi),%edx mov (%rsi),%eax cmp %eax,%edx mov %edx,%r8d cmovg %eax,%edx cmovg %r8d,%eax mov %edx,(%rdi) mov %eax,(%rsi) mov (%rdi),%eax mov (%r11),%edx cmp %eax,%edx mov %edx,%r8d cmovg %eax,%edx cmovg %r8d,%eax mov %edx,(%r11) mov %eax,(%rdi) mov (%rcx),%edx mov (%rsi),%eax cmp %eax,%edx mov %edx,%edi cmovg %eax,%edx cmovg %edi,%eax mov %edx,(%rcx) mov %eax,(%rsi) .L186: lea 2(%r9),%rax cmp %r13,%rax jge .L187 lea 0(,%r9,4),%rax lea (%r12,%rax),%rsi lea 8(%r12,%rax),%rcx mov (%rsi),%edx mov (%rcx),%eax cmp %eax,%edx mov %edx,%edi cmovg %eax,%edx cmovg %edi,%eax mov %edx,(%rsi) mov %eax,(%rcx) .L187: lea 1(%r9),%rax cmp %r13,%rax jge .L147 salq $2,%r9 lea (%r12,%r9),%rsi lea 4(%r12,%r9),%rcx mov (%rsi),%edx mov (%rcx),%eax cmp %eax,%edx mov %edx,%edi cmovg %eax,%edx cmovg %edi,%eax mov %edx,(%rsi) mov %eax,(%rcx) .L147: lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .endfn djbsort$avx2,globl,hidden minmax_vector: cmp $7,%rdx jg .L13 .L2: test %rdx,%rdx jle .L15 mov (%rdi),%ecx mov (%rsi),%eax add $4,%rdi add $4,%rsi cmp %eax,%ecx mov %ecx,%r8d cmovg %eax,%ecx cmovg %r8d,%eax decq %rdx mov %ecx,-4(%rdi) mov %eax,-4(%rsi) jmp .L2 .L15: ret .L13: testb $7,%dl je .L6 lea -32(,%rdx,4),%rax andq $-8,%rdx lea (%rdi,%rax),%rcx add %rsi,%rax vmovdqu (%rax),%ymm2 vpminsd (%rcx),%ymm2,%ymm1 vpmaxsd (%rcx),%ymm2,%ymm0 vmovdqu %ymm1,(%rcx) vmovdqu %ymm0,(%rax) .L6: xor %eax,%eax .L7: vmovdqu (%rdi,%rax),%ymm4 vpminsd (%rsi,%rax),%ymm4,%ymm1 vpmaxsd (%rsi,%rax),%ymm4,%ymm0 vmovdqu %ymm1,(%rdi,%rax) vmovdqu %ymm0,(%rsi,%rax) add $32,%rax sub $8,%rdx jne .L7 ret .endfn minmax_vector int32_twostages_32: sub $-128,%rdi .L17: lea -128(%rdi),%rax test %rsi,%rsi jle .L21 .L18: vmovdqu (%rax),%ymm5 vmovdqu 128(%rax),%ymm7 add $32,%rax vpminsd 352(%rax),%ymm7,%ymm3 vpminsd 224(%rax),%ymm5,%ymm2 vpmaxsd 224(%rax),%ymm5,%ymm0 vpmaxsd 352(%rax),%ymm7,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,-32(%rax) vmovdqu %ymm2,96(%rax) vmovdqu %ymm3,224(%rax) vmovdqu %ymm0,352(%rax) cmp %rax,%rdi jne .L18 add $-128,%rsi add $512,%rdi jmp .L17 .L21: ret .endfn int32_twostages_32 int32_threestages: push %rbp imul $-24,%rdx,%r8 lea 0(,%rdx,8),%rax mov %rsp,%rbp push %r15 push %r14 push %r13 push %r12 push %rbx andq $-32,%rsp sub $64,%rsp mov %rax,56(%rsp) lea 0(,%rdx,4),%rax lea (%rdi,%rax),%rcx mov %rsi,8(%rsp) lea (%rcx,%rax),%rsi lea (%rsi,%rax),%r9 lea (%r9,%rax),%r11 lea (%r11,%rax),%r12 lea (%r12,%rax),%r14 lea (%r14,%rax),%r15 lea (%r15,%r8),%rbx mov %rbx,40(%rsp) add %rax,%rbx lea (%rbx,%rax),%r10 mov %rbx,32(%rsp) lea (%r10,%rax),%rbx lea (%rbx,%rax),%r13 lea 0(%r13,%rax),%r8 mov %r8,24(%rsp) add %r8,%rax mov %rax,16(%rsp) xor %eax,%eax .L23: mov 56(%rsp),%r8 add %rax,%r8 mov %r8,48(%rsp) cmp 8(%rsp),%r8 jg .L28 .L25: cmp %rdx,%rax jge .L29 vmovdqu (%rdi,%rax,4),%ymm3 vmovdqu (%rsi,%rax,4),%ymm6 vpminsd (%r11,%rax,4),%ymm3,%ymm7 vpmaxsd (%r11,%rax,4),%ymm3,%ymm4 vpmaxsd (%r14,%rax,4),%ymm6,%ymm0 vmovdqu (%rcx,%rax,4),%ymm3 vmovdqu (%rsi,%rax,4),%ymm5 vpminsd (%r12,%rax,4),%ymm3,%ymm2 vpmaxsd (%r12,%rax,4),%ymm3,%ymm1 vpminsd (%r14,%rax,4),%ymm5,%ymm5 vmovdqu (%r9,%rax,4),%ymm3 vpminsd (%r15,%rax,4),%ymm3,%ymm6 vpmaxsd (%r15,%rax,4),%ymm3,%ymm3 vpminsd %ymm5,%ymm7,%ymm8 mov 40(%rsp),%r8 vpmaxsd %ymm5,%ymm7,%ymm5 vpminsd %ymm6,%ymm2,%ymm7 vpminsd %ymm7,%ymm8,%ymm9 vpmaxsd %ymm6,%ymm2,%ymm2 vpminsd %ymm0,%ymm4,%ymm6 vpmaxsd %ymm0,%ymm4,%ymm0 vmovdqu %ymm9,(%rdi,%rax,4) vpminsd %ymm3,%ymm1,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpmaxsd %ymm7,%ymm8,%ymm3 vpminsd %ymm2,%ymm5,%ymm7 vmovdqu %ymm3,(%r8,%rax,4) mov 32(%rsp),%r8 vpmaxsd %ymm2,%ymm5,%ymm2 vpminsd %ymm4,%ymm6,%ymm5 vpmaxsd %ymm4,%ymm6,%ymm6 vpminsd %ymm1,%ymm0,%ymm4 vmovdqu %ymm7,(%r8,%rax,4) mov 24(%rsp),%r8 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm2,(%r10,%rax,4) vmovdqu %ymm5,(%rbx,%rax,4) vmovdqu %ymm6,0(%r13,%rax,4) vmovdqu %ymm4,(%r8,%rax,4) mov 16(%rsp),%r8 vmovdqu %ymm0,(%r8,%rax,4) add $8,%rax jmp .L25 .L29: mov 48(%rsp),%rax add 56(%rsp),%rdx jmp .L23 .L28: lea -40(%rbp),%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp ret .endfn int32_threestages merge16_finish: vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm3,%ymm2 vperm2i128 $49,%ymm0,%ymm3,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm2 vpunpckhqdq %ymm0,%ymm1,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm2 vpunpckldq %ymm2,%ymm1,%ymm0 vpunpckhdq %ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm3 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm3,%ymm0,%ymm2 vpmaxsd %ymm3,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm0 vperm2i128 $32,%ymm0,%ymm1,%ymm2 vperm2i128 $49,%ymm0,%ymm1,%ymm0 test %esi,%esi je .L31 vpcmpeqd %ymm1,%ymm1,%ymm1 vpxor %ymm1,%ymm2,%ymm2 vpxor %ymm1,%ymm0,%ymm0 .L31: vmovdqu %ymm2,(%rdi) vmovdqu %ymm0,32(%rdi) ret .endfn merge16_finish int32_sort_2power: push %r13 lea 16(%rsp),%r13 andq $-32,%rsp push -8(%r13) push %rbp mov %rsp,%rbp push %r15 push %r14 push %r13 push %r12 mov %rdi,%r12 push %rbx sub $264,%rsp mov %edx,-116(%rbp) cmp $8,%rsi jne .L36 mov 4(%rdi),%edx mov (%rdi),%r8d mov 8(%rdi),%ecx mov 28(%r12),%r9d cmp %r8d,%edx mov %edx,%eax cmovg %r8d,%edx cmovg %eax,%r8d mov 12(%rdi),%eax cmp %ecx,%eax mov %eax,%esi cmovg %ecx,%eax cmovg %esi,%ecx cmp %r8d,%ecx mov %ecx,%esi cmovg %r8d,%ecx cmovg %esi,%r8d cmp %edx,%eax mov %eax,%esi cmovg %edx,%eax cmovg %esi,%edx mov 20(%rdi),%esi mov %edx,%r10d mov 16(%rdi),%edi cmp %r10d,%ecx mov %ecx,%edx cmovg %r10d,%ecx cmovg %edx,%r10d cmp %edi,%esi mov %esi,%edx cmovg %edi,%esi cmovg %edx,%edi mov 24(%r12),%edx cmp %edx,%r9d mov %r9d,%r11d cmovg %edx,%r9d cmovg %r11d,%edx cmp %edi,%edx mov %edx,%r11d cmovg %edi,%edx cmovg %r11d,%edi cmp %esi,%r9d mov %r9d,%r11d cmovg %esi,%r9d cmovg %r11d,%esi cmp %esi,%edx mov %edx,%r11d cmovg %esi,%edx cmovg %r11d,%esi cmp %r8d,%edi mov %edi,%r11d cmovg %r8d,%edi cmovg %r11d,%r8d cmp %ecx,%edx mov %edx,%r11d cmovg %ecx,%edx cmovg %r11d,%ecx mov %r8d,(%r12) cmp %ecx,%edi mov %edi,%r11d cmovg %ecx,%edi cmovg %r11d,%ecx cmp %r10d,%esi mov %esi,%r11d cmovg %r10d,%esi cmovg %r11d,%r10d cmp %eax,%r9d mov %r9d,%r11d cmovg %eax,%r9d cmovg %r11d,%eax cmp %eax,%esi mov %esi,%r11d cmovg %eax,%esi cmovg %r11d,%eax mov %r9d,28(%r12) cmp %r10d,%ecx mov %ecx,%r11d cmovg %r10d,%ecx cmovg %r11d,%r10d cmp %eax,%edi mov %edi,%r11d cmovg %eax,%edi cmovg %r11d,%eax mov %r10d,4(%r12) cmp %esi,%edx mov %edx,%r11d cmovg %esi,%edx cmovg %r11d,%esi mov %ecx,8(%r12) mov %eax,12(%r12) mov %edi,16(%r12) mov %esi,20(%r12) mov %edx,24(%r12) jmp .L35 .L36: mov %rsi,%r15 cmp $16,%rsi jne .L38 vmovdqa .LC0(%rip),%ymm0 vpxor 32(%rdi),%ymm0,%ymm2 vpxor (%rdi),%ymm0,%ymm0 vmovdqa .LC1(%rip),%ymm4 cmp $0,-116(%rbp) vpunpckldq %ymm2,%ymm0,%ymm1 vpunpckhdq %ymm2,%ymm0,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm3 vpunpckhqdq %ymm0,%ymm1,%ymm1 vpminsd %ymm3,%ymm1,%ymm2 vpmaxsd %ymm3,%ymm1,%ymm1 vpxor %ymm4,%ymm2,%ymm2 vpxor %ymm4,%ymm1,%ymm1 vpunpckldq %ymm1,%ymm2,%ymm0 vpunpckhdq %ymm1,%ymm2,%ymm1 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm1 vpunpcklqdq %ymm1,%ymm3,%ymm2 vpunpckhqdq %ymm1,%ymm3,%ymm3 vpunpckldq %ymm3,%ymm2,%ymm1 vpunpckhdq %ymm3,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm0,%ymm1,%ymm2 vpmaxsd %ymm0,%ymm1,%ymm1 vpunpckldq %ymm1,%ymm2,%ymm0 vpunpckhdq %ymm1,%ymm2,%ymm1 vpxor %ymm4,%ymm1,%ymm1 vpxor %ymm4,%ymm0,%ymm0 vperm2i128 $32,%ymm1,%ymm0,%ymm3 vperm2i128 $49,%ymm1,%ymm0,%ymm0 vpminsd %ymm3,%ymm0,%ymm2 vpmaxsd %ymm3,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm2 vpunpcklqdq %ymm2,%ymm3,%ymm1 vpunpckhqdq %ymm2,%ymm3,%ymm2 vpunpckldq %ymm2,%ymm1,%ymm0 vpunpckhdq %ymm2,%ymm1,%ymm2 vpunpcklqdq %ymm2,%ymm0,%ymm1 vpunpckhqdq %ymm2,%ymm0,%ymm0 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vpunpckldq %ymm0,%ymm2,%ymm1 vpunpckhdq %ymm0,%ymm2,%ymm0 vpunpcklqdq %ymm0,%ymm1,%ymm2 vpunpckhqdq %ymm0,%ymm1,%ymm1 vpcmpeqd %ymm0,%ymm0,%ymm0 je .L39 vpxor %ymm0,%ymm1,%ymm1 jmp .L40 .L39: vpxor %ymm0,%ymm2,%ymm2 .L40: mov -116(%rbp),%esi vmovdqa %ymm2,%ymm0 mov %r12,%rdi jmp .L134 .L38: cmp $32,%rsi jne .L41 mov $1,%edx mov $16,%esi lea 64(%r12),%r13 call int32_sort_2power xor %edx,%edx mov $16,%esi mov %r13,%rdi call int32_sort_2power cmp $0,-116(%rbp) vmovdqu (%r12),%ymm4 vmovdqu 32(%r12),%ymm1 vmovdqu 64(%r12),%ymm2 vmovdqu 96(%r12),%ymm3 je .L42 vpcmpeqd %ymm0,%ymm0,%ymm0 vpxor %ymm0,%ymm4,%ymm4 vpxor %ymm0,%ymm1,%ymm1 vpxor %ymm0,%ymm2,%ymm2 vpxor %ymm0,%ymm3,%ymm3 .L42: mov -116(%rbp),%esi vpmaxsd %ymm1,%ymm3,%ymm5 vpminsd %ymm4,%ymm2,%ymm0 mov %r12,%rdi vpmaxsd %ymm4,%ymm2,%ymm4 vpminsd %ymm1,%ymm3,%ymm1 vmovdqa %ymm5,-80(%rbp) vmovdqa %ymm4,-112(%rbp) call merge16_finish vmovdqa -80(%rbp),%ymm5 mov -116(%rbp),%esi mov %r13,%rdi vmovdqa -112(%rbp),%ymm4 vmovdqa %ymm5,%ymm1 vmovdqa %ymm4,%ymm0 .L134: add $264,%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp lea -16(%r13),%rsp pop %r13 jmp merge16_finish .L41: mov %rsi,%rax sar $3,%rax mov %rax,-80(%rbp) lea 0(,%rax,4),%r13 salq $3,%rax imul $-20,-80(%rbp),%rdx lea (%rdi,%rax),%rdi lea (%rdi,%rax),%rsi lea (%rsi,%rax),%rcx add %rcx,%rdx lea (%rdx,%rax),%r9 lea (%r9,%rax),%r8 add %r8,%rax mov %rax,-136(%rbp) mov %rax,%r10 xor %eax,%eax .L43: cmp -80(%rbp),%rax jge .L135 add $32,%rdi add $32,%rsi add $32,%rcx add $32,%rdx vmovdqu (%r12,%rax,4),%ymm5 add $32,%r9 add $32,%r8 add $32,%r10 vpminsd -32(%rsi),%ymm5,%ymm4 vpmaxsd -32(%rsi),%ymm5,%ymm2 vmovdqu -32(%rdi),%ymm5 vpminsd -32(%rcx),%ymm5,%ymm1 vpmaxsd -32(%rcx),%ymm5,%ymm0 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vpminsd %ymm4,%ymm1,%ymm2 vpmaxsd %ymm4,%ymm1,%ymm1 vmovdqu %ymm0,(%r12,%rax,4) add $8,%rax vpminsd %ymm1,%ymm3,%ymm4 vpmaxsd %ymm1,%ymm3,%ymm1 vmovdqu %ymm4,-32(%rdi) vmovdqu %ymm1,-32(%rsi) vmovdqu %ymm2,-32(%rcx) vmovdqu -32(%r8),%ymm5 vmovdqu -32(%r10),%ymm6 vpminsd -32(%rdx),%ymm5,%ymm1 vpminsd -32(%r9),%ymm6,%ymm3 vpmaxsd -32(%r9),%ymm6,%ymm2 vpmaxsd -32(%rdx),%ymm5,%ymm0 vpminsd %ymm3,%ymm1,%ymm4 vpmaxsd %ymm3,%ymm1,%ymm1 vpminsd %ymm2,%ymm0,%ymm3 vpmaxsd %ymm2,%ymm0,%ymm0 vmovdqu %ymm4,-32(%rdx) vpminsd %ymm1,%ymm3,%ymm2 vpmaxsd %ymm1,%ymm3,%ymm1 vmovdqu %ymm1,-32(%r9) vmovdqu %ymm2,-32(%r8) vmovdqu %ymm0,-32(%r10) jmp .L43 .L135: imul $-24,-80(%rbp),%rax mov %rax,-128(%rbp) cmp $127,%r15 jg .L105 .L63: lea (%r12,%r15,4),%rax vmovdqa .LC1(%rip),%ymm10 movl $3,-272(%rbp) mov $4,%r14d mov %rax,-144(%rbp) mov %r15,%rax vmovdqa .LC3(%rip),%ymm11 sar $4,%rax vmovdqa .LC2(%rip),%ymm12 mov %rax,-112(%rbp) mov -136(%rbp),%rax add -128(%rbp),%rax mov %rax,-200(%rbp) add %r13,%rax mov %rax,-192(%rbp) add %r13,%rax mov %rax,-184(%rbp) add %r13,%rax mov %rax,-176(%rbp) add %r13,%rax mov %rax,-168(%rbp) add %r13,%rax mov %rax,-160(%rbp) add %r13,%rax mov %rax,-152(%rbp) jmp .L46 .L105: xor %eax,%eax vpcmpeqd %ymm0,%ymm0,%ymm0 .L45: vpxor 64(%r12,%rax,4),%ymm0,%ymm1 vpxor (%r12,%rax,4),%ymm0,%ymm2 vmovdqu %ymm1,64(%r12,%rax,4) vmovdqu %ymm2,(%r12,%rax,4) add $32,%rax cmp %rax,%r15 jg .L45 mov -136(%rbp),%r14 add -128(%rbp),%r14 mov $8,%ebx vpcmpeqd %ymm10,%ymm10,%ymm10 lea (%r14,%r13),%rax mov %rax,-296(%rbp) add %r13,%rax lea (%rax,%r13),%r11 mov %rax,-176(%rbp) lea (%r11,%r13),%rax mov %rax,-288(%rbp) add %r13,%rax mov %rax,-144(%rbp) add %r13,%rax mov %rax,-112(%rbp) add -128(%rbp),%rax mov %rax,-200(%rbp) add %r13,%rax mov %rax,-192(%rbp) add %r13,%rax mov %rax,-184(%rbp) add %r13,%rax mov %rax,-168(%rbp) add %r13,%rax mov %rax,-160(%rbp) add %r13,%rax mov %rax,-152(%rbp) add %r13,%rax mov %rax,-280(%rbp) .L64: mov %rbx,%rcx sarq %rcx .L47: cmp $127,%rcx jle .L136 mov %rcx,%rdx mov %r15,%rsi mov %r12,%rdi mov %r11,-272(%rbp) sar $2,%rdx mov %rcx,-240(%rbp) call int32_threestages mov -240(%rbp),%rcx mov -272(%rbp),%r11 vpcmpeqd %ymm10,%ymm10,%ymm10 sar $3,%rcx jmp .L47 .L136: cmp $64,%rcx jne .L49 mov %r15,%rsi mov %r12,%rdi mov %r11,-240(%rbp) call int32_twostages_32 mov -240(%rbp),%r11 vpcmpeqd %ymm10,%ymm10,%ymm10 .L54: xor %eax,%eax jmp .L50 .L49: cmp $32,%rcx jne .L51 mov %r12,%rax xor %edx,%edx .L52: vmovdqu (%rax),%ymm7 vmovdqu 32(%rax),%ymm5 add $64,%rdx add $256,%rax vpminsd -128(%rax),%ymm7,%ymm8 vpmaxsd -128(%rax),%ymm7,%ymm4 vpminsd -96(%rax),%ymm5,%ymm1 vpmaxsd -96(%rax),%ymm5,%ymm0 vmovdqu -192(%rax),%ymm6 vmovdqu -160(%rax),%ymm7 vpminsd -64(%rax),%ymm6,%ymm5 vpmaxsd -32(%rax),%ymm7,%ymm2 vpmaxsd -64(%rax),%ymm6,%ymm3 vmovdqu -160(%rax),%ymm6 vpminsd -32(%rax),%ymm6,%ymm6 vpminsd %ymm5,%ymm8,%ymm7 vpmaxsd %ymm5,%ymm8,%ymm5 vpminsd %ymm6,%ymm1,%ymm8 vpmaxsd %ymm6,%ymm1,%ymm1 vpminsd %ymm3,%ymm4,%ymm6 vpmaxsd %ymm3,%ymm4,%ymm3 vpminsd %ymm2,%ymm0,%ymm4 vpmaxsd %ymm2,%ymm0,%ymm0 vpminsd %ymm8,%ymm7,%ymm9 vpmaxsd %ymm8,%ymm7,%ymm2 vpminsd %ymm1,%ymm5,%ymm7 vpmaxsd %ymm1,%ymm5,%ymm1 vmovdqu %ymm9,-256(%rax) vpminsd %ymm4,%ymm6,%ymm5 vpmaxsd %ymm4,%ymm6,%ymm6 vmovdqu %ymm2,-224(%rax) vpminsd %ymm0,%ymm3,%ymm4 vpmaxsd %ymm0,%ymm3,%ymm3 vmovdqu %ymm5,-128(%rax) vmovdqu %ymm7,-192(%rax) vmovdqu %ymm1,-160(%rax) vmovdqu %ymm6,-96(%rax) vmovdqu %ymm4,-64(%rax) vmovdqu %ymm3,-32(%rax) cmp %rdx,%r15 jg .L52 .L56: lea (%rbx,%rbx),%rdx xor %ecx,%ecx cmp -80(%rbp),%rdx setne %al sete %cl mov %rdx,%r8 xor %esi,%esi movzbl %al,%eax mov %eax,-204(%rbp) jmp .L53 .L51: cmp $16,%rcx jne .L131 jmp .L54 .L50: vmovdqu (%r12,%rax,4),%ymm5 vmovdqu 32(%r12,%rax,4),%ymm6 vpminsd 64(%r12,%rax,4),%ymm5,%ymm2 vpminsd 96(%r12,%rax,4),%ymm6,%ymm3 vpmaxsd 64(%r12,%rax,4),%ymm5,%ymm0 vpmaxsd 96(%r12,%rax,4),%ymm6,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%r12,%rax,4) vmovdqu %ymm2,32(%r12,%rax,4) vmovdqu %ymm3,64(%r12,%rax,4) vmovdqu %ymm0,96(%r12,%rax,4) add $32,%rax cmp %rax,%r15 jg .L50 jmp .L56 .L131: cmp $8,%rcx jne .L56 xor %eax,%eax .L57: vmovdqu 32(%r12,%rax,4),%ymm7 vpmaxsd (%r12,%rax,4),%ymm7,%ymm0 vpminsd (%r12,%rax,4),%ymm7,%ymm1 vmovdqu %ymm0,32(%r12,%rax,4) vmovdqu %ymm1,(%r12,%rax,4) add $16,%rax cmp %rax,%r15 jg .L57 jmp .L56 .L59: mov -176(%rbp),%r10 vmovdqu (%r12,%rax,4),%ymm5 vpminsd (%r14,%rax,4),%ymm5,%ymm6 vpmaxsd (%r14,%rax,4),%ymm5,%ymm15 vmovdqu (%r10,%rax,4),%ymm5 mov -296(%rbp),%r10 vmovdqu (%r10,%rax,4),%ymm7 mov -288(%rbp),%r10 vmovdqa %ymm5,-240(%rbp) vmovdqa %ymm7,-272(%rbp) vmovdqu (%r10,%rax,4),%ymm7 mov -112(%rbp),%r10 vmovdqa -272(%rbp),%ymm5 vpminsd -240(%rbp),%ymm5,%ymm1 vpmaxsd -240(%rbp),%ymm5,%ymm5 vmovdqa %ymm7,-240(%rbp) vmovdqa -240(%rbp),%ymm4 vpmaxsd (%r11,%rax,4),%ymm4,%ymm0 vmovdqu (%r10,%rax,4),%ymm4 vpminsd %ymm1,%ymm6,%ymm8 mov -144(%rbp),%r10 vmovdqa -240(%rbp),%ymm7 vpmaxsd %ymm1,%ymm6,%ymm6 vpminsd %ymm5,%ymm15,%ymm1 vmovdqa %ymm4,-240(%rbp) vpminsd (%r11,%rax,4),%ymm7,%ymm7 vpmaxsd %ymm5,%ymm15,%ymm15 vmovdqu (%r10,%rax,4),%ymm4 vmovdqa %ymm4,-272(%rbp) vmovdqa -272(%rbp),%ymm4 vpminsd -240(%rbp),%ymm4,%ymm3 vpmaxsd -240(%rbp),%ymm4,%ymm4 vpminsd %ymm3,%ymm7,%ymm2 vpmaxsd %ymm3,%ymm7,%ymm3 vpminsd %ymm4,%ymm0,%ymm7 vpmaxsd %ymm4,%ymm0,%ymm0 vpminsd %ymm2,%ymm8,%ymm14 vpminsd %ymm7,%ymm1,%ymm13 vpminsd %ymm3,%ymm6,%ymm12 vpminsd %ymm0,%ymm15,%ymm11 vmovdqa %ymm14,%ymm9 vpmaxsd %ymm3,%ymm6,%ymm6 vpmaxsd %ymm2,%ymm8,%ymm2 vmovdqa %ymm13,%ymm8 vpmaxsd %ymm7,%ymm1,%ymm1 vpmaxsd %ymm0,%ymm15,%ymm0 vmovdqa %ymm6,-240(%rbp) vmovdqa %ymm2,%ymm5 vmovdqa -240(%rbp),%ymm3 vmovdqa %ymm1,%ymm4 vmovdqa %ymm12,%ymm7 vmovdqa %ymm11,%ymm6 vmovdqa %ymm0,%ymm15 test %ecx,%ecx je .L58 vpxor %ymm14,%ymm10,%ymm9 vpxor %ymm13,%ymm10,%ymm8 vpxor %ymm12,%ymm10,%ymm7 vpxor %ymm11,%ymm10,%ymm6 vpxor %ymm2,%ymm10,%ymm5 vpxor %ymm1,%ymm10,%ymm4 vpxor %ymm3,%ymm10,%ymm3 vpxor %ymm0,%ymm10,%ymm15 .L58: mov -200(%rbp),%r10 vmovdqu %ymm9,(%r12,%rax,4) vmovdqu %ymm8,(%r10,%rax,4) mov -192(%rbp),%r10 vmovdqu %ymm7,(%r10,%rax,4) mov -184(%rbp),%r10 vmovdqu %ymm6,(%r10,%rax,4) mov -168(%rbp),%r10 vmovdqu %ymm5,(%r10,%rax,4) mov -160(%rbp),%r10 vmovdqu %ymm4,(%r10,%rax,4) mov -152(%rbp),%r10 vmovdqu %ymm3,(%r10,%rax,4) mov -280(%rbp),%r10 vmovdqu %ymm15,(%r10,%rax,4) add $8,%rax .L60: cmp %rax,%rdi jg .L59 xor $1,%ecx lea (%rdx,%r9),%rdi .L62: mov %rdi,%r9 sub %rbx,%r9 mov %r9,%rax cmp %r9,%r8 jg .L60 xor -204(%rbp),%ecx add %rdx,%rsi add %rdx,%r8 .L53: cmp -80(%rbp),%rsi jge .L61 lea (%rsi,%rbx),%rdi jmp .L62 .L61: salq $4,%rbx cmp %r15,%rbx je .L63 mov %rdx,%rbx jmp .L64 .L46: cmp $4,%r14 jne .L132 mov %r12,%rax .L65: cmp -144(%rbp),%rax je .L72 vpxor 32(%rax),%ymm12,%ymm0 vpxor (%rax),%ymm12,%ymm1 add $64,%rax vmovdqu %ymm1,-64(%rax) vmovdqu %ymm0,-32(%rax) jmp .L65 .L72: mov -112(%rbp),%rbx jmp .L68 .L132: mov %r12,%rax cmp $2,%r14 jne .L70 .L69: cmp -144(%rbp),%rax je .L72 vpxor 32(%rax),%ymm10,%ymm2 vpxor (%rax),%ymm10,%ymm1 add $64,%rax vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vmovdqu %ymm1,-64(%rax) vmovdqu %ymm0,-32(%rax) jmp .L69 .L70: cmp -144(%rbp),%rax je .L72 vpxor 32(%rax),%ymm11,%ymm2 vpxor (%rax),%ymm11,%ymm1 add $64,%rax vperm2i128 $32,%ymm2,%ymm1,%ymm0 vperm2i128 $49,%ymm2,%ymm1,%ymm1 vpunpcklqdq %ymm1,%ymm0,%ymm2 vpunpckhqdq %ymm1,%ymm0,%ymm0 vpminsd %ymm0,%ymm2,%ymm1 vpmaxsd %ymm0,%ymm2,%ymm2 vpunpcklqdq %ymm2,%ymm1,%ymm0 vpunpckhqdq %ymm2,%ymm1,%ymm1 vpminsd %ymm1,%ymm0,%ymm2 vpmaxsd %ymm1,%ymm0,%ymm0 vperm2i128 $32,%ymm0,%ymm2,%ymm1 vperm2i128 $49,%ymm0,%ymm2,%ymm0 vmovdqu %ymm1,-64(%rax) vmovdqu %ymm0,-32(%rax) jmp .L70 .L137: cmp $32,%rbx jne .L75 .L74: mov %rbx,%rdx mov %r15,%rsi mov %r12,%rdi sar $3,%rbx sar $2,%rdx call int32_threestages vmovdqa .LC2(%rip),%ymm12 vmovdqa .LC3(%rip),%ymm11 vmovdqa .LC1(%rip),%ymm10 .L68: cmp $127,%rbx jle .L137 jmp .L74 .L139: sar $2,%rbx .L75: cmp $15,%rbx jle .L138 mov %rbx,%rcx xor %esi,%esi sarq %rcx imul $-8,%rcx,%rdi lea 0(,%rcx,4),%rdx lea (%r12,%rdx),%r11 lea (%r11,%rdx),%r10 lea (%r10,%rdx),%r8 lea (%rdi,%r8),%rax lea (%rax,%rdx),%r9 mov %rax,-136(%rbp) lea (%r9,%rdx),%rax mov %rax,-240(%rbp) .L76: cmp %r15,%rsi jge .L139 mov %rsi,%rax .L78: cmp %rcx,%rax jge .L140 vmovdqu (%r12,%rax,4),%ymm6 vmovdqu (%r11,%rax,4),%ymm5 vpminsd (%r10,%rax,4),%ymm6,%ymm2 vpminsd (%r8,%rax,4),%ymm5,%ymm3 mov -136(%rbp),%rdi vpmaxsd (%r10,%rax,4),%ymm6,%ymm0 vpmaxsd (%r8,%rax,4),%ymm5,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vmovdqu %ymm4,(%r12,%rax,4) vmovdqu %ymm2,(%rdi,%rax,4) mov -240(%rbp),%rdi vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm3,(%r9,%rax,4) vmovdqu %ymm0,(%rdi,%rax,4) add $8,%rax jmp .L78 .L140: add %rdx,%rsi add %rdx,%rcx jmp .L76 .L138: cmp $8,%rbx je .L109 .L83: mov -152(%rbp),%rdx mov -160(%rbp),%rcx xor %eax,%eax mov -168(%rbp),%rsi mov -176(%rbp),%rdi mov -184(%rbp),%r8 mov -192(%rbp),%r9 mov -200(%rbp),%r10 jmp .L81 .L109: xor %eax,%eax .L80: cmp %r15,%rax jge .L83 vmovdqu (%r12,%rax,4),%ymm5 vpminsd 32(%r12,%rax,4),%ymm5,%ymm1 vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0 vmovdqu %ymm1,(%r12,%rax,4) vmovdqu %ymm0,32(%r12,%rax,4) add $16,%rax jmp .L80 .L81: cmp -80(%rbp),%rax jge .L141 vmovdqu (%rdi),%ymm7 add $32,%r10 add $32,%r9 add $32,%r8 add $32,%rdi add $32,%rsi add $32,%rcx add $32,%rdx vmovdqu (%r12,%rax,4),%ymm5 vmovdqu -32(%r9),%ymm6 vpminsd -32(%r10),%ymm5,%ymm3 vpmaxsd -32(%r10),%ymm5,%ymm1 vpminsd -32(%r8),%ymm6,%ymm2 vpmaxsd -32(%r8),%ymm6,%ymm0 vpminsd -32(%rsi),%ymm7,%ymm7 vmovdqu -32(%rcx),%ymm5 vmovdqu -32(%rdi),%ymm6 vpmaxsd -32(%rdx),%ymm5,%ymm4 vpminsd %ymm2,%ymm3,%ymm9 vpmaxsd -32(%rsi),%ymm6,%ymm8 vpminsd -32(%rdx),%ymm5,%ymm6 vpminsd %ymm0,%ymm1,%ymm13 vpmaxsd %ymm2,%ymm3,%ymm2 vpminsd %ymm6,%ymm7,%ymm5 vpminsd %ymm4,%ymm8,%ymm3 vpmaxsd %ymm6,%ymm7,%ymm6 vpmaxsd %ymm0,%ymm1,%ymm0 vpmaxsd %ymm4,%ymm8,%ymm4 vpminsd %ymm5,%ymm9,%ymm1 vpminsd %ymm6,%ymm2,%ymm8 vpminsd %ymm3,%ymm13,%ymm7 vmovdqu %ymm1,(%r12,%rax,4) add $8,%rax vpmaxsd %ymm6,%ymm2,%ymm2 vpmaxsd %ymm5,%ymm9,%ymm5 vmovdqu %ymm7,-32(%r10) vpminsd %ymm4,%ymm0,%ymm6 vpmaxsd %ymm3,%ymm13,%ymm3 vmovdqu %ymm8,-32(%r9) vpmaxsd %ymm4,%ymm0,%ymm0 vmovdqu %ymm6,-32(%r8) vmovdqu %ymm5,-32(%rdi) vmovdqu %ymm3,-32(%rsi) vmovdqu %ymm2,-32(%rcx) vmovdqu %ymm0,-32(%rdx) jmp .L81 .L141: sarq %r14 decl -272(%rbp) jne .L46 mov %r12,%rax xor %edx,%edx vpcmpeqd %ymm5,%ymm5,%ymm5 .L85: cmp %r15,%rdx jge .L89 vmovdqu (%rax),%ymm7 vpunpckldq 32(%rax),%ymm7,%ymm12 vpunpckhdq 32(%rax),%ymm7,%ymm6 vmovdqu 64(%rax),%ymm7 vpunpckldq 96(%rax),%ymm7,%ymm2 vpunpckhdq 96(%rax),%ymm7,%ymm4 vmovdqu 128(%rax),%ymm7 vpunpckldq 160(%rax),%ymm7,%ymm1 vpunpckhdq 160(%rax),%ymm7,%ymm0 vpunpcklqdq %ymm2,%ymm12,%ymm8 vpunpcklqdq %ymm4,%ymm6,%ymm9 cmp $0,-116(%rbp) vmovdqu 192(%rax),%ymm7 vpunpckhqdq %ymm2,%ymm12,%ymm12 vpunpckhqdq %ymm4,%ymm6,%ymm4 vpunpckldq 224(%rax),%ymm7,%ymm10 vpunpckhdq 224(%rax),%ymm7,%ymm3 vpunpcklqdq %ymm10,%ymm1,%ymm11 vpunpckhqdq %ymm10,%ymm1,%ymm1 vpunpcklqdq %ymm3,%ymm0,%ymm7 vpunpckhqdq %ymm3,%ymm0,%ymm0 je .L86 vpxor %ymm5,%ymm12,%ymm12 vpxor %ymm5,%ymm4,%ymm4 vpxor %ymm5,%ymm1,%ymm1 vpxor %ymm5,%ymm0,%ymm0 jmp .L87 .L86: vpxor %ymm5,%ymm8,%ymm8 vpxor %ymm5,%ymm9,%ymm9 vpxor %ymm5,%ymm11,%ymm11 vpxor %ymm5,%ymm7,%ymm7 .L87: vperm2i128 $32,%ymm11,%ymm8,%ymm3 vperm2i128 $32,%ymm1,%ymm12,%ymm6 vperm2i128 $32,%ymm7,%ymm9,%ymm10 add $64,%rdx vperm2i128 $32,%ymm0,%ymm4,%ymm13 vperm2i128 $49,%ymm11,%ymm8,%ymm11 vperm2i128 $49,%ymm7,%ymm9,%ymm9 add $256,%rax vperm2i128 $49,%ymm1,%ymm12,%ymm1 vperm2i128 $49,%ymm0,%ymm4,%ymm0 vpmaxsd %ymm6,%ymm3,%ymm2 vpminsd %ymm6,%ymm3,%ymm4 vpminsd %ymm1,%ymm11,%ymm7 vpmaxsd %ymm13,%ymm10,%ymm3 vpminsd %ymm13,%ymm10,%ymm8 vpmaxsd %ymm1,%ymm11,%ymm1 vpminsd %ymm0,%ymm9,%ymm10 vpmaxsd %ymm0,%ymm9,%ymm0 vpminsd %ymm8,%ymm4,%ymm11 vpminsd %ymm3,%ymm2,%ymm9 vpmaxsd %ymm8,%ymm4,%ymm8 vpminsd %ymm10,%ymm7,%ymm6 vpmaxsd %ymm10,%ymm7,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm0,%ymm1,%ymm3 vpmaxsd %ymm0,%ymm1,%ymm1 vpminsd %ymm6,%ymm11,%ymm10 vpmaxsd %ymm6,%ymm11,%ymm0 vpminsd %ymm3,%ymm9,%ymm7 vpmaxsd %ymm3,%ymm9,%ymm6 vpminsd %ymm4,%ymm8,%ymm3 vpminsd %ymm1,%ymm2,%ymm9 vpmaxsd %ymm4,%ymm8,%ymm4 vpunpckldq %ymm7,%ymm10,%ymm8 vpmaxsd %ymm1,%ymm2,%ymm2 vpunpckhdq %ymm7,%ymm10,%ymm7 vpunpckldq %ymm9,%ymm3,%ymm1 vpunpckhdq %ymm9,%ymm3,%ymm3 vpunpckldq %ymm6,%ymm0,%ymm9 vpunpckhdq %ymm6,%ymm0,%ymm6 vpunpckldq %ymm2,%ymm4,%ymm0 vpunpckhdq %ymm2,%ymm4,%ymm2 vpunpcklqdq %ymm3,%ymm7,%ymm10 vpunpcklqdq %ymm1,%ymm8,%ymm4 vpunpcklqdq %ymm0,%ymm9,%ymm13 vpunpckhqdq %ymm1,%ymm8,%ymm8 vpunpckhqdq %ymm3,%ymm7,%ymm3 vpunpckhqdq %ymm0,%ymm9,%ymm1 vpunpcklqdq %ymm2,%ymm6,%ymm7 vpunpckhqdq %ymm2,%ymm6,%ymm0 vperm2i128 $32,%ymm13,%ymm4,%ymm12 vperm2i128 $32,%ymm1,%ymm8,%ymm11 vperm2i128 $32,%ymm0,%ymm3,%ymm6 vperm2i128 $32,%ymm7,%ymm10,%ymm9 vperm2i128 $49,%ymm13,%ymm4,%ymm4 vmovdqu %ymm12,-256(%rax) vperm2i128 $49,%ymm1,%ymm8,%ymm1 vperm2i128 $49,%ymm7,%ymm10,%ymm2 vperm2i128 $49,%ymm0,%ymm3,%ymm0 vmovdqu %ymm11,-224(%rax) vmovdqu %ymm9,-192(%rax) vmovdqu %ymm6,-160(%rax) vmovdqu %ymm4,-128(%rax) vmovdqu %ymm1,-96(%rax) vmovdqu %ymm2,-64(%rax) vmovdqu %ymm0,-32(%rax) jmp .L85 .L142: cmp $32,-112(%rbp) jne .L94 .L93: mov -112(%rbp),%rcx sar $2,%rcx lea 0(,%rcx,4),%rdx lea 0(,%rcx,8),%rax mov %rcx,-136(%rbp) lea (%r12,%rdx),%r9 mov %rax,-184(%rbp) imul $-24,%rcx,%rax lea (%r9,%rdx),%r14 lea (%r14,%rdx),%rsi lea (%rsi,%rdx),%rbx lea (%rbx,%rdx),%r10 lea (%r10,%rdx),%r8 lea (%r8,%rdx),%rdi add %rdi,%rax mov %rax,-176(%rbp) add %rdx,%rax mov %rax,-168(%rbp) add %rdx,%rax lea (%rax,%rdx),%r11 mov %rax,-160(%rbp) lea (%r11,%rdx),%rax mov %rax,-200(%rbp) add %rdx,%rax add %rax,%rdx mov %rax,-144(%rbp) mov %rdx,-192(%rbp) .L90: mov -136(%rbp),%rax sub %rcx,%rax cmp %rax,%r15 jg .L92 sarq $3,-112(%rbp) .L89: cmp $127,-112(%rbp) jle .L142 jmp .L93 .L92: cmp -136(%rbp),%rax jge .L143 vmovdqu (%r12,%rax,4),%ymm6 vpminsd (%rbx,%rax,4),%ymm6,%ymm7 vpmaxsd (%rbx,%rax,4),%ymm6,%ymm4 vmovdqu (%r9,%rax,4),%ymm6 vpminsd (%r10,%rax,4),%ymm6,%ymm1 vpmaxsd (%r10,%rax,4),%ymm6,%ymm0 vmovdqu (%r14,%rax,4),%ymm6 vpminsd (%r8,%rax,4),%ymm6,%ymm5 vpmaxsd (%r8,%rax,4),%ymm6,%ymm3 vmovdqu (%rsi,%rax,4),%ymm6 vpminsd (%rdi,%rax,4),%ymm6,%ymm6 vpminsd %ymm5,%ymm7,%ymm9 vmovdqu (%rsi,%rax,4),%ymm2 vpmaxsd %ymm5,%ymm7,%ymm5 mov -176(%rbp),%rdx vpminsd %ymm3,%ymm4,%ymm8 vpminsd %ymm6,%ymm1,%ymm7 vpmaxsd %ymm3,%ymm4,%ymm3 vpminsd %ymm7,%ymm9,%ymm10 vpmaxsd %ymm7,%ymm9,%ymm4 vpmaxsd (%rdi,%rax,4),%ymm2,%ymm2 vpmaxsd %ymm6,%ymm1,%ymm1 vmovdqu %ymm10,(%r12,%rax,4) vmovdqu %ymm4,(%rdx,%rax,4) mov -168(%rbp),%rdx vpminsd %ymm1,%ymm5,%ymm9 vpmaxsd %ymm1,%ymm5,%ymm1 vpminsd %ymm2,%ymm0,%ymm6 vpmaxsd %ymm2,%ymm0,%ymm0 vmovdqu %ymm9,(%rdx,%rax,4) vpminsd %ymm6,%ymm8,%ymm7 vpmaxsd %ymm6,%ymm8,%ymm2 mov -160(%rbp),%rdx vpminsd %ymm0,%ymm3,%ymm5 vpmaxsd %ymm0,%ymm3,%ymm3 vmovdqu %ymm1,(%rdx,%rax,4) mov -200(%rbp),%rdx vmovdqu %ymm7,(%r11,%rax,4) vmovdqu %ymm2,(%rdx,%rax,4) mov -144(%rbp),%rdx vmovdqu %ymm5,(%rdx,%rax,4) mov -192(%rbp),%rdx vmovdqu %ymm3,(%rdx,%rax,4) add $8,%rax jmp .L92 .L143: mov -184(%rbp),%rdx add %rdx,-136(%rbp) jmp .L90 .L145: sarq $2,-112(%rbp) .L94: cmp $15,-112(%rbp) jle .L144 mov -112(%rbp),%rcx xor %esi,%esi sarq %rcx imul $-8,%rcx,%rdi lea 0(,%rcx,4),%rdx lea (%r12,%rdx),%r11 lea (%r11,%rdx),%r10 lea (%r10,%rdx),%r8 add %r8,%rdi lea (%rdi,%rdx),%r9 lea (%r9,%rdx),%rbx .L95: cmp %r15,%rsi jge .L145 mov %rsi,%rax .L97: cmp %rcx,%rax jge .L146 vmovdqu (%r12,%rax,4),%ymm5 vpminsd (%r10,%rax,4),%ymm5,%ymm2 vpmaxsd (%r10,%rax,4),%ymm5,%ymm0 vmovdqu (%r11,%rax,4),%ymm5 vpminsd (%r8,%rax,4),%ymm5,%ymm3 vpmaxsd (%r8,%rax,4),%ymm5,%ymm1 vpminsd %ymm3,%ymm2,%ymm4 vpmaxsd %ymm3,%ymm2,%ymm2 vpminsd %ymm1,%ymm0,%ymm3 vpmaxsd %ymm1,%ymm0,%ymm0 vmovdqu %ymm4,(%r12,%rax,4) vmovdqu %ymm2,(%rdi,%rax,4) vmovdqu %ymm3,(%r9,%rax,4) vmovdqu %ymm0,(%rbx,%rax,4) add $8,%rax jmp .L97 .L146: add %rdx,%rsi add %rdx,%rcx jmp .L95 .L144: cmp $8,-112(%rbp) je .L111 .L102: mov -152(%rbp),%rdx add -128(%rbp),%rdx xor %ecx,%ecx vpcmpeqd %ymm6,%ymm6,%ymm6 lea (%rdx,%r13),%r10 lea (%r10,%r13),%r9 lea (%r9,%r13),%r8 lea (%r8,%r13),%rdi lea (%rdi,%r13),%rsi lea (%rsi,%r13),%rax jmp .L100 .L111: xor %eax,%eax .L99: cmp %r15,%rax jge .L102 vmovdqu (%r12,%rax,4),%ymm5 vpminsd 32(%r12,%rax,4),%ymm5,%ymm1 vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0 vmovdqu %ymm1,(%r12,%rax,4) vmovdqu %ymm0,32(%r12,%rax,4) add $16,%rax jmp .L99 .L104: vmovdqu (%r10),%ymm7 vmovdqu (%r12,%rcx,4),%ymm4 vpminsd (%r9),%ymm7,%ymm3 vpminsd (%rdx),%ymm4,%ymm5 vpmaxsd (%r9),%ymm7,%ymm2 vpmaxsd (%rdx),%ymm4,%ymm4 vmovdqu (%r8),%ymm7 vmovdqu (%rsi),%ymm14 vpminsd %ymm3,%ymm5,%ymm11 vpmaxsd %ymm3,%ymm5,%ymm3 vpminsd (%rdi),%ymm7,%ymm1 vpminsd %ymm2,%ymm4,%ymm10 cmp $0,-116(%rbp) vpmaxsd (%rdi),%ymm7,%ymm0 vmovdqu (%rsi),%ymm7 vpmaxsd %ymm2,%ymm4,%ymm2 vpminsd (%rax),%ymm7,%ymm7 vpmaxsd (%rax),%ymm14,%ymm9 vpminsd %ymm7,%ymm1,%ymm8 vpmaxsd %ymm7,%ymm1,%ymm1 vpminsd %ymm9,%ymm0,%ymm7 vpmaxsd %ymm9,%ymm0,%ymm0 vpminsd %ymm8,%ymm11,%ymm5 vpminsd %ymm1,%ymm3,%ymm9 vpminsd %ymm7,%ymm10,%ymm12 vpmaxsd %ymm1,%ymm3,%ymm3 vpminsd %ymm0,%ymm2,%ymm4 vpmaxsd %ymm8,%ymm11,%ymm8 vpmaxsd %ymm0,%ymm2,%ymm2 vpmaxsd %ymm7,%ymm10,%ymm7 vpunpckldq %ymm8,%ymm5,%ymm11 vpunpckldq %ymm7,%ymm12,%ymm10 vpunpckhdq %ymm8,%ymm5,%ymm8 vpunpckhdq %ymm7,%ymm12,%ymm7 vpunpckhdq %ymm3,%ymm9,%ymm5 vpunpckldq %ymm2,%ymm4,%ymm1 vpunpckldq %ymm3,%ymm9,%ymm0 vpunpckhdq %ymm2,%ymm4,%ymm4 vpunpcklqdq %ymm0,%ymm11,%ymm3 vpunpckhqdq %ymm0,%ymm11,%ymm9 vpunpcklqdq %ymm5,%ymm8,%ymm2 vpunpcklqdq %ymm4,%ymm7,%ymm11 vpunpckhqdq %ymm5,%ymm8,%ymm5 vpunpcklqdq %ymm1,%ymm10,%ymm12 vpunpckhqdq %ymm4,%ymm7,%ymm0 vpunpckhqdq %ymm1,%ymm10,%ymm1 vperm2i128 $32,%ymm11,%ymm2,%ymm8 vperm2i128 $32,%ymm12,%ymm3,%ymm10 vperm2i128 $32,%ymm1,%ymm9,%ymm7 vperm2i128 $32,%ymm0,%ymm5,%ymm4 vperm2i128 $49,%ymm12,%ymm3,%ymm3 vperm2i128 $49,%ymm11,%ymm2,%ymm2 vperm2i128 $49,%ymm1,%ymm9,%ymm1 vperm2i128 $49,%ymm0,%ymm5,%ymm0 je .L103 vpxor %ymm6,%ymm10,%ymm10 vpxor %ymm6,%ymm8,%ymm8 vpxor %ymm6,%ymm7,%ymm7 vpxor %ymm6,%ymm4,%ymm4 vpxor %ymm6,%ymm3,%ymm3 vpxor %ymm6,%ymm2,%ymm2 vpxor %ymm6,%ymm1,%ymm1 vpxor %ymm6,%ymm0,%ymm0 .L103: add $32,%rdx add $32,%r10 add $32,%r9 add $32,%r8 vmovdqu %ymm10,(%r12,%rcx,4) add $32,%rdi add $8,%rcx add $32,%rsi vmovdqu %ymm3,-32(%rdx) add $32,%rax vmovdqu %ymm8,-32(%r10) vmovdqu %ymm2,-32(%r9) vmovdqu %ymm7,-32(%r8) vmovdqu %ymm1,-32(%rdi) vmovdqu %ymm4,-32(%rsi) vmovdqu %ymm0,-32(%rax) .L100: cmp -80(%rbp),%rcx jl .L104 .L35: add $264,%rsp pop %rbx pop %r12 pop %r13 pop %r14 pop %r15 pop %rbp lea -16(%r13),%rsp pop %r13 ret .endfn int32_sort_2power .rodata.cst32 .LC0: .quad -1,0,-1,0 .LC1: .quad 0,-1,-1,0 .LC2: .quad -1,-1,0,0 .LC3: .quad -4294967296,4294967295,-4294967296,4294967295 .LC4: .quad 0x7fffffff7fffffff .quad 0x7fffffff7fffffff .quad 0x7fffffff7fffffff .quad 0x7fffffff7fffffff