register_combiners: movl $combine1, %esi subq $8, %rsp movl $combine1_descr, %edx movq %rsi, %rdi call add_combiner movl $combine2_descr, %edx movl $combine1, %esi movl $combine2, %edi call add_combiner movl $combine3_descr, %edx movl $combine1, %esi movl $combine3, %edi call add_combiner movl $combine4_descr, %edx movl $combine1, %esi movl $combine4, %edi call add_combiner movl $combine4p_descr, %edx movl $combine1, %esi movl $combine4p, %edi call add_combiner movl $unroll2a_descr, %edx movl $combine1, %esi movl $unroll2a_combine, %edi call add_combiner movl $combine5p_descr, %edx movl $combine1, %esi movl $combine5p, %edi call add_combiner movl $unroll3aw_descr, %edx movl $combine1, %esi movl $unroll3aw_combine, %edi call add_combiner movl $unroll4a_descr, %edx movl $combine1, %esi movl $unroll4a_combine, %edi call add_combiner movl $unroll8a_descr, %edx movl $combine1, %esi movl $unroll8a_combine, %edi call add_combiner movl $unroll16a_descr, %edx movl $combine1, %esi movl $unroll16a_combine, %edi call add_combiner movl $unroll2_descr, %edx movl $combine1, %esi movl $unroll2_combine, %edi call add_combiner movl $unroll3_descr, %edx movl $combine1, %esi movl $unroll3_combine, %edi call add_combiner movl $unroll4_descr, %edx movl $combine1, %esi movl $unroll4_combine, %edi call add_combiner movl $unroll8_descr, %edx movl $combine1, %esi movl $unroll8_combine, %edi call add_combiner movl $unroll16_descr, %edx movl $combine1, %esi movl $unroll16_combine, %edi call add_combiner movl $combine6_descr, %edx movl $combine1, %esi movl $combine6, %edi call add_combiner movl $unroll4x2a_descr, %edx movl $combine1, %esi movl $unroll4x2a_combine, %edi call add_combiner movl $unroll8x2a_descr, %edx movl $combine1, %esi movl $unroll8x2a_combine, %edi call add_combiner movl $unroll3x3a_descr, %edx movl $combine1, %esi movl $unroll3x3a_combine, %edi call add_combiner movl $unroll4x4a_descr, %edx movl $combine1, %esi movl $unroll4x4a_combine, %edi call add_combiner movl $unroll8x4a_descr, %edx movl $combine1, %esi movl $unroll8x4a_combine, %edi call add_combiner movl $unroll6x6a_descr, %edx movl $combine1, %esi movl $unroll6x6a_combine, %edi call add_combiner movl $unroll8x8a_descr, %edx movl $combine1, %esi movl $unroll8x8a_combine, %edi call add_combiner movl $unroll10x10a_descr, %edx movl $combine1, %esi movl $unroll10x10a_combine, %edi call add_combiner movl $unroll12x6a_descr, %edx movl $combine1, %esi movl $unroll12x6a_combine, %edi call add_combiner movl $unroll12x12a_descr, %edx movl $combine1, %esi movl $unroll12x12a_combine, %edi call add_combiner movl $unroll8x2_descr, %edx movl $combine1, %esi movl $unroll8x2_combine, %edi call add_combiner movl $unroll8x4_descr, %edx movl $combine1, %esi movl $unroll8x4_combine, %edi call add_combiner movl $unroll8x8_descr, %edx movl $combine1, %esi movl $unroll8x8_combine, %edi call add_combiner movl $unroll9x3_descr, %edx movl $combine1, %esi movl $unroll9x3_combine, %edi call add_combiner movl $unrollx2as_descr, %edx movl $combine1, %esi movl $unrollx2as_combine, %edi call add_combiner movl $unroll2aa_descr, %edx movl $combine1, %esi movl $unroll2aa_combine, %edi call add_combiner movl $unroll3aa_descr, %edx movl $combine1, %esi movl $unroll3aa_combine, %edi call add_combiner movl $unroll4aa_descr, %edx movl $combine1, %esi movl $unroll4aa_combine, %edi call add_combiner movl $unroll6aa_descr, %edx movl $combine1, %esi movl $unroll6aa_combine, %edi call add_combiner movl $unroll8aa_descr, %edx movl $combine1, %esi movl $unroll8aa_combine, %edi call add_combiner movl $unrollv1_descr, %edx movl $combine1, %esi movl $unrollv1_combine, %edi call add_combiner movl $unrollv2_descr, %edx movl $combine1, %esi movl $unrollv2_combine, %edi call add_combiner movl $unrollv4_descr, %edx movl $combine1, %esi movl $unrollv4_combine, %edi call add_combiner movl $unrollv8_descr, %edx movl $combine1, %esi movl $unrollv8_combine, %edi call add_combiner movl $unrollv12_descr, %edx movl $combine1, %esi movl $unrollv12_combine, %edi call add_combiner movl $unrollv2a_descr, %edx movl $combine1, %esi movl $unrollv2a_combine, %edi call add_combiner movl $unrollv4a_descr, %edx movl $combine1, %esi movl $unrollv4a_combine, %edi call add_combiner movl $unrollv8a_descr, %edx movl $combine1, %esi movl $unrollv8a_combine, %edi addq $8, %rsp jmp add_combiner unrollv8a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $31, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm2 jle .L4 movl %eax, %edi movq %rbp, %rdx leal -32(%rdi), %esi movl %esi, %ecx shrl $5, %ecx mov %ecx, %eax salq $7, %rax leaq 128(%rbp,%rax), %rax .L5: movdqa (%rdx), %xmm1 movdqa 64(%rdx), %xmm0 paddd 16(%rdx), %xmm1 paddd 32(%rdx), %xmm1 paddd 48(%rdx), %xmm1 paddd 80(%rdx), %xmm0 paddd 96(%rdx), %xmm0 paddd 112(%rdx), %xmm0 subq $-128, %rdx paddd %xmm0, %xmm1 paddd %xmm1, %xmm2 cmpq %rax, %rdx jne .L5 leal -32(%rdi), %eax sall $5, %ecx subl %ecx, %esi shrl $5, %eax mov %eax, %eax salq $7, %rax leaq 128(%rbp,%rax), %rbp .L4: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L7 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L10: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L10 .L7: movdqa %xmm2, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $15, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm1 jle .L15 movl %eax, %edi movq %rbp, %rdx leal -16(%rdi), %esi movl %esi, %ecx shrl $4, %ecx mov %ecx, %eax salq $6, %rax leaq 64(%rbp,%rax), %rax .L16: movdqa (%rdx), %xmm0 paddd 16(%rdx), %xmm0 paddd 32(%rdx), %xmm0 paddd 48(%rdx), %xmm0 addq $64, %rdx paddd %xmm0, %xmm1 cmpq %rax, %rdx jne .L16 leal -16(%rdi), %eax sall $4, %ecx subl %ecx, %esi shrl $4, %eax mov %eax, %eax salq $6, %rax leaq 64(%rbp,%rax), %rbp .L15: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L18 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L21: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L21 .L18: movdqa %xmm1, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $7, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 jle .L25 movl %eax, %edi movq %rbp, %rdx leal -8(%rdi), %esi movl %esi, %ecx shrl $3, %ecx mov %ecx, %eax salq $5, %rax leaq 32(%rbp,%rax), %rax .L26: paddd (%rdx), %xmm0 paddd 16(%rdx), %xmm0 addq $32, %rdx cmpq %rax, %rdx jne .L26 leal 0(,%rcx,8), %eax subl %eax, %esi leal -8(%rdi), %eax shrl $3, %eax mov %eax, %eax salq $5, %rax leaq 32(%rbp,%rax), %rbp .L25: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L28 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L31: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L31 .L28: movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv12_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $47, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 jle .L45 movl %eax, %edi movl $-1431655765, %eax movq %rbp, %r8 leal -48(%rdi), %esi movdqa %xmm0, %xmm11 movdqa %xmm0, %xmm9 mull %esi movdqa %xmm0, %xmm10 movdqa %xmm0, %xmm7 movl %edx, %ecx movdqa %xmm0, %xmm8 shrl $5, %ecx movdqa %xmm0, %xmm5 mov %ecx, %eax movdqa %xmm0, %xmm6 leaq 3(%rax,%rax,2), %rax movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 salq $6, %rax movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm2 leaq (%rbp,%rax), %rax .L37: paddd (%r8), %xmm0 paddd 16(%r8), %xmm11 paddd 32(%r8), %xmm9 paddd 48(%r8), %xmm10 paddd 64(%r8), %xmm7 paddd 80(%r8), %xmm8 paddd 96(%r8), %xmm5 paddd 112(%r8), %xmm6 paddd 128(%r8), %xmm3 paddd 144(%r8), %xmm4 paddd 160(%r8), %xmm1 paddd 176(%r8), %xmm2 addq $192, %r8 cmpq %rax, %r8 jne .L37 leal (%rcx,%rcx,2), %eax leal -48(%rdi), %edx sall $4, %eax subl %eax, %esi movl $-1431655765, %eax mull %edx shrl $5, %edx mov %edx, %edx leaq 3(%rdx,%rdx,2), %rdx salq $6, %rdx addq %rdx, %rbp .L36: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L39 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L42: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L42 .L39: paddd %xmm11, %xmm0 paddd %xmm10, %xmm9 paddd %xmm8, %xmm7 paddd %xmm9, %xmm0 paddd %xmm6, %xmm5 paddd %xmm7, %xmm0 paddd %xmm4, %xmm3 paddd %xmm5, %xmm0 paddd %xmm2, %xmm1 paddd %xmm3, %xmm0 paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L45: movdqa %xmm0, %xmm11 movdqa %xmm0, %xmm9 movdqa %xmm0, %xmm10 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm8 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm6 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm2 jmp .L36 unrollv8_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $31, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 jle .L57 movl %eax, %edi movdqa %xmm0, %xmm7 leal -32(%rdi), %esi movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm6 movq %rbp, %rdx movl %esi, %ecx movdqa %xmm0, %xmm3 shrl $5, %ecx movdqa %xmm0, %xmm4 mov %ecx, %eax movdqa %xmm0, %xmm1 salq $7, %rax movdqa %xmm0, %xmm2 leaq 128(%rbp,%rax), %rax .L49: paddd (%rdx), %xmm0 paddd 16(%rdx), %xmm7 paddd 32(%rdx), %xmm5 paddd 48(%rdx), %xmm6 paddd 64(%rdx), %xmm3 paddd 80(%rdx), %xmm4 paddd 96(%rdx), %xmm1 paddd 112(%rdx), %xmm2 subq $-128, %rdx cmpq %rax, %rdx jne .L49 leal -32(%rdi), %eax sall $5, %ecx subl %ecx, %esi shrl $5, %eax mov %eax, %eax salq $7, %rax leaq 128(%rbp,%rax), %rbp .L48: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L51 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L54: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L54 .L51: paddd %xmm7, %xmm0 paddd %xmm6, %xmm5 paddd %xmm4, %xmm3 paddd %xmm5, %xmm0 paddd %xmm2, %xmm1 paddd %xmm3, %xmm0 paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L57: movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm6 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm2 jmp .L48 unrollv4_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $15, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 jle .L69 movl %eax, %edi movdqa %xmm0, %xmm3 leal -16(%rdi), %esi movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm1 movq %rbp, %rdx movl %esi, %ecx shrl $4, %ecx mov %ecx, %eax salq $6, %rax leaq 64(%rbp,%rax), %rax .L61: paddd (%rdx), %xmm0 paddd 16(%rdx), %xmm3 paddd 32(%rdx), %xmm2 paddd 48(%rdx), %xmm1 addq $64, %rdx cmpq %rax, %rdx jne .L61 leal -16(%rdi), %eax sall $4, %ecx subl %ecx, %esi shrl $4, %eax mov %eax, %eax salq $6, %rax leaq 64(%rbp,%rax), %rbp .L60: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L63 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L66: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L66 .L63: paddd %xmm3, %xmm0 paddd %xmm1, %xmm2 paddd %xmm2, %xmm0 movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret .L69: movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm1 jmp .L60 unrollv2_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $7, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 movq %rbp, %rdx movdqa %xmm0, %xmm1 jle .L72 movl %eax, %edi leal -8(%rdi), %esi movl %esi, %ecx shrl $3, %ecx mov %ecx, %eax salq $5, %rax leaq 32(%rbp,%rax), %rax .L79: paddd (%rdx), %xmm0 paddd 16(%rdx), %xmm1 addq $32, %rdx cmpq %rax, %rdx jne .L79 leal 0(,%rcx,8), %eax subl %eax, %esi leal -8(%rdi), %eax shrl $3, %eax mov %eax, %eax salq $5, %rax leaq 32(%rbp,%rax), %rbp .L72: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L75 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L78: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L78 .L75: paddd %xmm1, %xmm0 movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv1_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $0, (%rsp) movl $0, 4(%rsp) cmpl $3, %eax movl $0, 8(%rsp) movl $0, 12(%rsp) movl %eax, %esi movdqa (%rsp), %xmm0 jle .L83 movl %eax, %edi movq %rbp, %rdx leal -4(%rdi), %esi movl %esi, %ecx shrl $2, %ecx mov %ecx, %eax salq $4, %rax leaq 16(%rbp,%rax), %rax .L84: paddd (%rdx), %xmm0 addq $16, %rdx cmpq %rax, %rdx jne .L84 leal 0(,%rcx,4), %eax subl %eax, %esi leal -4(%rdi), %eax shrl $2, %eax mov %eax, %eax salq $4, %rax leaq 16(%rbp,%rax), %rbp .L83: xorl %ecx, %ecx testl %esi, %esi movq %rbp, %rdx je .L86 leal -1(%rsi), %eax leaq 4(%rbp,%rax,4), %rax .L89: addl (%rdx), %ecx addq $4, %rdx cmpq %rax, %rdx jne .L89 .L86: movdqa %xmm0, (%rsp) movl 4(%rsp), %eax addl (%rsp), %eax addl 8(%rsp), %eax addl %ecx, %eax addl 12(%rsp), %eax movl %eax, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unroll8aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %edi, %edi xorl %edx, %edx xorl %esi, %esi cmpl $7, %ebp movq %rax, %rcx jle .L94 leal -8(%rbp), %r8d shrl $3, %r8d mov %r8d, %eax leaq 1(%rax), %rsi salq $5, %rsi .L99: movl 4(%rcx,%rdx), %eax addl (%rcx,%rdx), %eax addl 8(%rcx,%rdx), %eax addl 12(%rcx,%rdx), %eax addl 16(%rcx,%rdx), %eax addl 20(%rcx,%rdx), %eax addl 24(%rcx,%rdx), %eax addl 28(%rcx,%rdx), %eax addq $32, %rdx addl %eax, %edi cmpq %rsi, %rdx jne .L99 leal 8(,%r8,8), %esi .L94: cmpl %esi, %ebp jle .L96 movslq %esi,%rax leaq (%rcx,%rax,4), %rax .L97: addl $1, %esi addl (%rax), %edi addq $4, %rax cmpl %esi, %ebp jg .L97 .L96: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll6aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %edi, %edi xorl %ecx, %ecx xorl %edx, %edx cmpl $5, %ebp movq %rax, %rsi jle .L104 leal -6(%rbp), %r8d movl $-1431655765, %eax mull %r8d movl %edx, %r8d shrl $2, %r8d mov %r8d, %eax leaq 3(%rax,%rax,2), %rax leaq 0(,%rax,8), %rdx .L109: movl 4(%rsi,%rcx), %eax addl (%rsi,%rcx), %eax addl 8(%rsi,%rcx), %eax addl 12(%rsi,%rcx), %eax addl 16(%rsi,%rcx), %eax addl 20(%rsi,%rcx), %eax addq $24, %rcx addl %eax, %edi cmpq %rdx, %rcx jne .L109 leal 3(%r8,%r8,2), %eax leal (%rax,%rax), %edx .L104: cmpl %edx, %ebp jle .L106 movslq %edx,%rax leaq (%rsi,%rax,4), %rax .L107: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %edx, %ebp jg .L107 .L106: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll4aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %edi, %edi xorl %edx, %edx xorl %ecx, %ecx cmpl $3, %ebp movq %rax, %rsi jle .L114 leal -4(%rbp), %r8d shrl $2, %r8d mov %r8d, %eax leaq 1(%rax), %rcx salq $4, %rcx .L119: movl 4(%rsi,%rdx), %eax addl (%rsi,%rdx), %eax addl 8(%rsi,%rdx), %eax addl 12(%rsi,%rdx), %eax addq $16, %rdx addl %eax, %edi cmpq %rcx, %rdx jne .L119 leal 4(,%r8,4), %ecx .L114: cmpl %ecx, %ebp jle .L116 movslq %ecx,%rax leaq (%rsi,%rax,4), %rax .L117: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %ecx, %ebp jg .L117 .L116: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll3aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx xorl %edx, %edx cmpl $2, %ebp movq %rax, %rdi jle .L124 leal -3(%rbp), %r8d movl $-1431655765, %eax mull %r8d movl %edx, %r8d shrl %r8d mov %r8d, %eax leaq 3(%rax,%rax,2), %rax leaq 0(,%rax,4), %rdx .L129: movl 4(%rdi,%rcx), %eax addl (%rdi,%rcx), %eax addl 8(%rdi,%rcx), %eax addq $12, %rcx addl %eax, %esi cmpq %rdx, %rcx jne .L129 leal 3(%r8,%r8,2), %edx .L124: cmpl %edx, %ebp jle .L126 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L127: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L127 .L126: popq %rbx popq %rbp movl %esi, (%r12) popq %r12 ret unroll2aa_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx xorl %esi, %esi cmpl $1, %ebp movq %rax, %rdi jle .L134 leal -2(%rbp), %esi shrl %esi mov %esi, %eax leaq 8(,%rax,8), %rax .L139: addl 4(%rdi,%rdx), %ecx addl (%rdi,%rdx), %ecx addq $8, %rdx cmpq %rax, %rdx jne .L139 leal 2(%rsi,%rsi), %esi .L134: cmpl %esi, %ebp jle .L136 movslq %esi,%rax leaq (%rdi,%rax,4), %rax .L137: addl $1, %esi addl (%rax), %ecx addq $4, %rax cmpl %esi, %ebp jg .L137 .L136: popq %rbx popq %rbp movl %ecx, (%r12) popq %r12 ret unroll8x8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %r12 movq %rax, %rdx cmpq %r12, %rax jae .L152 xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %ecx, %ecx xorl %edi, %edi xorl %esi, %esi .L145: addl (%rax), %ecx addl 4(%rax), %edi addl 8(%rax), %esi addl 12(%rax), %ebx addl 16(%rax), %r11d addl 20(%rax), %r10d addl 24(%rax), %r9d addl 28(%rax), %r8d addq $32, %rax cmpq %rax, %r12 ja .L145 movq %rdx, %rax notq %rax addq %r12, %rax andq $-32, %rax leaq 32(%rdx,%rax), %rdx .L144: leaq 28(%r12), %rax cmpq %rdx, %rax jbe .L146 .L149: addl (%rdx), %ecx addq $4, %rdx cmpq %rdx, %rax ja .L149 .L146: leal (%r11,%rbx), %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %edi, %eax addl %esi, %eax addl %ecx, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L152: xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi xorl %ecx, %ecx jmp .L144 unroll8x4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %r9 movq %rax, %rcx cmpq %r9, %rax jae .L163 xorl %edx, %edx xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi .L156: addl 16(%rax), %edx addl 20(%rax), %r8d addl 24(%rax), %edi addl 28(%rax), %esi addl (%rax), %edx addl 4(%rax), %r8d addl 8(%rax), %edi addl 12(%rax), %esi addq $32, %rax cmpq %rax, %r9 ja .L156 movq %rcx, %rax notq %rax addq %r9, %rax andq $-32, %rax leaq 32(%rcx,%rax), %rcx .L155: leaq 28(%r9), %rax cmpq %rcx, %rax jbe .L157 .L160: addl (%rcx), %edx addq $4, %rcx cmpq %rcx, %rax ja .L160 .L157: leal (%rdi,%r8), %eax addl %esi, %eax addl %edx, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L163: xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi xorl %edx, %edx jmp .L155 unroll9x3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -32(%rax,%rbx,4), %r9 movq %rax, %rcx cmpq %r9, %rax jae .L174 movq %rax, %rdx xorl %esi, %esi xorl %r8d, %r8d xorl %edi, %edi .L167: movl 12(%rdx), %eax addl (%rdx), %eax addl 24(%rdx), %eax addl %eax, %esi movl 16(%rdx), %eax addl 4(%rdx), %eax addl 28(%rdx), %eax addl %eax, %r8d movl 20(%rdx), %eax addl 8(%rdx), %eax addl 32(%rdx), %eax addq $36, %rdx addl %eax, %edi cmpq %rdx, %r9 ja .L167 movq %rcx, %rdx movabsq $-2049638230412172401, %rax notq %rdx addq %r9, %rdx mulq %rdx shrq $5, %rdx leaq 9(%rdx,%rdx,8), %rdx leaq (%rcx,%rdx,4), %rcx .L166: leaq 32(%r9), %rax cmpq %rcx, %rax jbe .L168 .L171: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L171 .L168: leal (%rdi,%r8), %eax addl %esi, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret .L174: xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi jmp .L166 unroll8x2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %r8 xorl %edi, %edi xorl %esi, %esi movq %rax, %rcx cmpq %r8, %rax jae .L177 movq %rax, %rdx xorl %esi, %esi xorl %edi, %edi .L178: movl 8(%rdx), %eax addl (%rdx), %eax addl 16(%rdx), %eax addl 24(%rdx), %eax addl %eax, %esi movl 12(%rdx), %eax addl 4(%rdx), %eax addl 20(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %edi cmpq %rdx, %r8 ja .L178 movq %rcx, %rax notq %rax addq %r8, %rax andq $-32, %rax leaq 32(%rcx,%rax), %rcx .L177: leaq 28(%r8), %rax cmpq %rcx, %rax jbe .L179 .L182: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L182 .L179: leal (%rsi,%rdi), %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4x2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%rax,%r12), %ebp call get_vec_start sarl %ebp movq %rax, %rdi xorl %esi, %esi movslq %ebp,%rax xorl %ecx, %ecx testl %ebp, %ebp leaq (%rdi,%rax,4), %rdx jle .L187 xorl %esi, %esi xorl %ecx, %ecx xorl %eax, %eax .L188: addl (%rdi,%rax,4), %esi addl (%rdx,%rax,4), %ecx addq $1, %rax cmpl %eax, %ebp jg .L188 .L187: leal (%rbp,%rbp), %edx cmpl %edx, %r12d jle .L189 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L190: addl $1, %edx addl (%rax), %ecx addq $4, %rax cmpl %edx, %r12d jg .L190 .L189: leal (%rcx,%rsi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unrollx2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%rax,%r12), %ebp call get_vec_start sarl %ebp movq %rax, %rdi xorl %esi, %esi movslq %ebp,%rax xorl %ecx, %ecx testl %ebp, %ebp leaq (%rdi,%rax,4), %rdx jle .L196 xorl %esi, %esi xorl %ecx, %ecx xorl %eax, %eax .L197: addl (%rdi,%rax,4), %esi addl (%rdx,%rax,4), %ecx addq $1, %rax cmpl %eax, %ebp jg .L197 .L196: leal (%rbp,%rbp), %edx cmpl %edx, %r12d jle .L198 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L199: addl $1, %edx addl (%rax), %ecx addq $4, %rax cmpl %edx, %r12d jg .L199 .L198: leal (%rcx,%rsi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll10x10a_combine: pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $24, %rsp movq %rsi, 8(%rsp) call vec_length movq %rbx, %rdi movl %eax, %r15d call get_vec_start cmpl $9, %r15d movq %rax, 16(%rsp) jle .L212 leal -10(%r15), %edx movl $-858993459, %eax movq 16(%rsp), %rcx xorl %edi, %edi xorl %r14d, %r14d xorl %r13d, %r13d mull %edx xorl %r12d, %r12d xorl %ebp, %ebp xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi shrl $3, %edx mov %edx, %eax leaq 5(%rax,%rax,4), %rax salq $3, %rax .L206: addq $40, %rsi addl (%rcx), %edi addl 4(%rcx), %r14d addl 8(%rcx), %r13d addl 12(%rcx), %r12d addl 16(%rcx), %ebp addl 20(%rcx), %ebx addl 24(%rcx), %r11d addl 28(%rcx), %r10d addl 32(%rcx), %r9d addl 36(%rcx), %r8d addq $40, %rcx cmpq %rax, %rsi jne .L206 leal 5(%rdx,%rdx,4), %eax leal (%rax,%rax), %edx .L205: cmpl %edx, %r15d jle .L207 movq 16(%rsp), %rcx movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L208: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %edx, %r15d jg .L208 .L207: leal (%r13,%r14), %eax movq 8(%rsp), %rdx addl %r12d, %eax addl %ebp, %eax addl %ebx, %eax addl %r11d, %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %edi, %eax movl %eax, (%rdx) addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret .L212: xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebp, %ebp xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %edx, %edx jmp .L205 unroll8x8a_combine: pushq %r14 movq %rsi, %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %r12d call get_vec_start cmpl $7, %r12d movq %rax, %rcx jle .L222 leal -8(%r12), %r13d xorl %esi, %esi xorl %ebp, %ebp xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d shrl $3, %r13d xorl %r9d, %r9d xorl %r8d, %r8d mov %r13d, %eax xorl %edi, %edi xorl %edx, %edx addq $1, %rax salq $5, %rax .L216: addl (%rcx,%rdx), %esi addl 4(%rcx,%rdx), %ebp addl 8(%rcx,%rdx), %ebx addl 12(%rcx,%rdx), %r11d addl 16(%rcx,%rdx), %r10d addl 20(%rcx,%rdx), %r9d addl 24(%rcx,%rdx), %r8d addl 28(%rcx,%rdx), %edi addq $32, %rdx cmpq %rax, %rdx jne .L216 leal 8(,%r13,8), %edx .L215: cmpl %edx, %r12d jle .L217 movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L218: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %r12d jg .L218 .L217: leal (%rbx,%rbp), %eax popq %rbx addl %r11d, %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax popq %rbp addl %edi, %eax popq %r12 addl %esi, %eax popq %r13 movl %eax, (%r14) popq %r14 ret .L222: xorl %ebp, %ebp xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi xorl %edx, %edx jmp .L215 unroll6x6a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $5, %ebp movq %rax, %rsi jle .L232 leal -6(%rbp), %edx movl $-1431655765, %eax xorl %edi, %edi xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d mull %edx xorl %r9d, %r9d xorl %r8d, %r8d xorl %ecx, %ecx shrl $2, %edx mov %edx, %eax leaq 3(%rax,%rax,2), %rax salq $3, %rax .L226: addl (%rsi,%rcx), %edi addl 4(%rsi,%rcx), %ebx addl 8(%rsi,%rcx), %r11d addl 12(%rsi,%rcx), %r10d addl 16(%rsi,%rcx), %r9d addl 20(%rsi,%rcx), %r8d addq $24, %rcx cmpq %rax, %rcx jne .L226 leal 3(%rdx,%rdx,2), %eax leal (%rax,%rax), %edx .L225: cmpl %edx, %ebp jle .L227 movslq %edx,%rax leaq (%rsi,%rax,4), %rax .L228: addl $1, %edx addl (%rax), %edi addq $4, %rax cmpl %edx, %ebp jg .L228 .L227: leal (%r11,%rbx), %eax popq %rbx addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %edi, %eax popq %rbp movl %eax, (%r12) popq %r12 ret .L232: xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edx, %edx xorl %edi, %edi jmp .L225 unroll12x12a_combine: pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $40, %rsp movq %rsi, 8(%rsp) call vec_length movq %rbx, %rdi movl %eax, 20(%rsp) call get_vec_start cmpl $11, 20(%rsp) movq %rax, 24(%rsp) jle .L242 movl 20(%rsp), %eax movq 24(%rsp), %rcx xorl %edi, %edi movl $0, 36(%rsp) xorl %r15d, %r15d xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebp, %ebp subl $12, %eax xorl %ebx, %ebx xorl %r11d, %r11d movl %eax, 4(%rsp) movl $-1431655765, %eax xorl %r10d, %r10d mull 4(%rsp) xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi shrl $3, %edx mov %edx, %eax movl %edx, 4(%rsp) leaq 3(%rax,%rax,2), %rax salq $4, %rax .L236: addq $48, %rsi addl (%rcx), %edi addl 24(%rcx), %ebp movl 4(%rcx), %edx addl 28(%rcx), %ebx addl 8(%rcx), %r15d addl 32(%rcx), %r11d addl 12(%rcx), %r14d addl 36(%rcx), %r10d addl 16(%rcx), %r13d addl 40(%rcx), %r9d addl 20(%rcx), %r12d addl 44(%rcx), %r8d addq $48, %rcx addl %edx, 36(%rsp) cmpq %rax, %rsi jne .L236 movl 4(%rsp), %ecx leal 3(%rcx,%rcx,2), %eax leal 0(,%rax,4), %edx .L235: cmpl %edx, 20(%rsp) jle .L237 movq 24(%rsp), %rcx movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L238: addl (%rax), %edi addl $1, %edx addq $4, %rax cmpl %edx, 20(%rsp) jg .L238 .L237: movl 36(%rsp), %edx movq 8(%rsp), %rcx leal (%r15,%rdx), %eax addl %r14d, %eax addl %r13d, %eax addl %r12d, %eax addl %ebp, %eax addl %ebx, %eax addl %r11d, %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %edi, %eax movl %eax, (%rcx) addq $40, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret .L242: movl $0, 36(%rsp) xorl %r15d, %r15d xorl %r14d, %r14d xorl %r13d, %r13d xorl %r12d, %r12d xorl %ebp, %ebp xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %edx, %edx jmp .L235 unroll12x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $11, %ebp movq %rax, %r12 jle .L252 leal -12(%rbp), %edx movq %rax, %rcx movl $-1431655765, %eax xorl %esi, %esi xorl %ebx, %ebx xorl %r11d, %r11d mull %edx xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi shrl $3, %edx mov %edx, %eax leaq 3(%rax,%rax,2), %rax salq $4, %rax .L246: addl 24(%rcx), %esi addl 28(%rcx), %ebx addq $48, %rdi addl 32(%rcx), %r11d addl 36(%rcx), %r10d addl 40(%rcx), %r9d addl 44(%rcx), %r8d addl (%rcx), %esi addl 4(%rcx), %ebx addl 8(%rcx), %r11d addl 12(%rcx), %r10d addl 16(%rcx), %r9d addl 20(%rcx), %r8d addq $48, %rcx cmpq %rax, %rdi jne .L246 leal 3(%rdx,%rdx,2), %eax leal 0(,%rax,4), %edx .L245: cmpl %edx, %ebp jle .L247 movslq %edx,%rax leaq (%r12,%rax,4), %rax .L248: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L248 .L247: leal (%r11,%rbx), %eax addl %r10d, %eax addl %r9d, %eax addl %r8d, %eax addl %esi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret .L252: xorl %ebx, %ebx xorl %r11d, %r11d xorl %r10d, %r10d xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi xorl %edx, %edx jmp .L245 unroll8x4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $7, %ebp movq %rax, %rcx jle .L262 leal -8(%rbp), %r10d xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %edx, %edx shrl $3, %r10d mov %r10d, %eax addq $1, %rax salq $5, %rax .L256: addl 16(%rcx,%rdx), %esi addl 20(%rcx,%rdx), %r9d addl 24(%rcx,%rdx), %r8d addl 28(%rcx,%rdx), %edi addl (%rcx,%rdx), %esi addl 4(%rcx,%rdx), %r9d addl 8(%rcx,%rdx), %r8d addl 12(%rcx,%rdx), %edi addq $32, %rdx cmpq %rax, %rdx jne .L256 leal 8(,%r10,8), %edx .L255: cmpl %edx, %ebp jle .L257 movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L258: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L258 .L257: leal (%r8,%r9), %eax popq %rbx addl %edi, %eax addl %esi, %eax popq %rbp movl %eax, (%r12) popq %r12 ret .L262: xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi xorl %edx, %edx jmp .L255 unroll4x4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $3, %ebp movq %rax, %rcx jle .L272 leal -4(%rbp), %r10d xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %edx, %edx shrl $2, %r10d mov %r10d, %eax addq $1, %rax salq $4, %rax .L266: addl (%rcx,%rdx), %esi addl 4(%rcx,%rdx), %r9d addl 8(%rcx,%rdx), %r8d addl 12(%rcx,%rdx), %edi addq $16, %rdx cmpq %rax, %rdx jne .L266 leal 4(,%r10,4), %edx .L265: cmpl %edx, %ebp jle .L267 movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L268: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L268 .L267: leal (%r8,%r9), %eax popq %rbx addl %edi, %eax addl %esi, %eax popq %rbp movl %eax, (%r12) popq %r12 ret .L272: xorl %r9d, %r9d xorl %r8d, %r8d xorl %edi, %edi xorl %esi, %esi xorl %edx, %edx jmp .L265 unroll3x3a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $2, %ebp movq %rax, %rdi jle .L282 leal -3(%rbp), %edx movl $-1431655765, %eax xorl %esi, %esi xorl %r9d, %r9d xorl %r8d, %r8d xorl %ecx, %ecx mull %edx shrl %edx mov %edx, %eax leaq 3(%rax,%rax,2), %rax salq $2, %rax .L276: addl (%rdi,%rcx), %esi addl 4(%rdi,%rcx), %r9d addl 8(%rdi,%rcx), %r8d addq $12, %rcx cmpq %rax, %rcx jne .L276 leal 3(%rdx,%rdx,2), %edx .L275: cmpl %edx, %ebp jle .L277 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L278: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L278 .L277: leal (%r8,%r9), %eax popq %rbx addl %esi, %eax popq %rbp movl %eax, (%r12) popq %r12 ret .L282: xorl %r9d, %r9d xorl %r8d, %r8d xorl %esi, %esi xorl %edx, %edx jmp .L275 unroll8x2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $7, %ebp movq %rax, %rcx jle .L292 leal -8(%rbp), %r9d xorl %esi, %esi xorl %r8d, %r8d xorl %edx, %edx shrl $3, %r9d mov %r9d, %eax leaq 1(%rax), %rdi salq $5, %rdi .L286: movl 8(%rcx,%rdx), %eax addl (%rcx,%rdx), %eax addl 16(%rcx,%rdx), %eax addl 24(%rcx,%rdx), %eax addl %eax, %esi movl 12(%rcx,%rdx), %eax addl 4(%rcx,%rdx), %eax addl 20(%rcx,%rdx), %eax addl 28(%rcx,%rdx), %eax addq $32, %rdx addl %eax, %r8d cmpq %rdi, %rdx jne .L286 leal 8(,%r9,8), %edx .L285: cmpl %edx, %ebp jle .L287 movslq %edx,%rax leaq (%rcx,%rax,4), %rax .L288: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L288 .L287: leal (%rsi,%r8), %eax popq %rbx popq %rbp movl %eax, (%r12) popq %r12 ret .L292: xorl %r8d, %r8d xorl %esi, %esi xorl %edx, %edx jmp .L285 unroll4x2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $3, %ebp movq %rax, %rsi jle .L302 leal -4(%rbp), %r8d xorl %ecx, %ecx xorl %edi, %edi xorl %edx, %edx shrl $2, %r8d mov %r8d, %eax addq $1, %rax salq $4, %rax .L296: addl 8(%rsi,%rdx), %ecx addl 12(%rsi,%rdx), %edi addl (%rsi,%rdx), %ecx addl 4(%rsi,%rdx), %edi addq $16, %rdx cmpq %rax, %rdx jne .L296 leal 4(,%r8,4), %edx .L295: cmpl %edx, %ebp jle .L297 movslq %edx,%rax leaq (%rsi,%rax,4), %rax .L298: addl $1, %edx addl (%rax), %ecx addq $4, %rax cmpl %edx, %ebp jg .L298 .L297: leal (%rcx,%rdi), %eax popq %rbx popq %rbp movl %eax, (%r12) popq %r12 ret .L302: xorl %edi, %edi xorl %ecx, %ecx xorl %edx, %edx jmp .L295 combine6: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start cmpl $1, %ebp movq %rax, %rsi jle .L312 leal -2(%rbp), %r8d xorl %ecx, %ecx xorl %edi, %edi xorl %edx, %edx shrl %r8d mov %r8d, %eax leaq 8(,%rax,8), %rax .L306: addl (%rsi,%rdx), %ecx addl 4(%rsi,%rdx), %edi addq $8, %rdx cmpq %rax, %rdx jne .L306 leal 2(%r8,%r8), %edx .L305: cmpl %edx, %ebp jle .L307 movslq %edx,%rax leaq (%rsi,%rax,4), %rax .L308: addl $1, %edx addl (%rax), %ecx addq $4, %rax cmpl %edx, %ebp jg .L308 .L307: leal (%rcx,%rdi), %eax popq %rbx popq %rbp movl %eax, (%r12) popq %r12 ret .L312: xorl %edi, %edi xorl %ecx, %ecx xorl %edx, %edx jmp .L305 unroll16_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx xorl %esi, %esi sarl $31, %edx shrl $28, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $15, %eax subl %edx, %eax movq %rcx, %rdx movslq %eax,%r8 subq %r8, %rbx leaq (%rcx,%rbx,4), %rdi cmpq %rdi, %rcx jae .L315 .L321: movl 4(%rdx), %eax addl (%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addl 32(%rdx), %eax addl 36(%rdx), %eax addl 40(%rdx), %eax addl 44(%rdx), %eax addl 48(%rdx), %eax addl 52(%rdx), %eax addl 56(%rdx), %eax addl 60(%rdx), %eax addq $64, %rdx addl %eax, %esi cmpq %rdx, %rdi ja .L321 movq %rcx, %rax notq %rax addq %rdi, %rax andq $-64, %rax leaq 64(%rcx,%rax), %rcx .L315: leaq (%rdi,%r8,4), %rax cmpq %rcx, %rax jbe .L317 .L320: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L320 .L317: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx xorl %esi, %esi sarl $31, %edx shrl $29, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $7, %eax subl %edx, %eax movq %rcx, %rdx movslq %eax,%r8 subq %r8, %rbx leaq (%rcx,%rbx,4), %rdi cmpq %rdi, %rcx jae .L326 .L332: movl 4(%rdx), %eax addl (%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx addl %eax, %esi cmpq %rdx, %rdi ja .L332 movq %rcx, %rax notq %rax addq %rdi, %rax andq $-32, %rax leaq 32(%rcx,%rax), %rcx .L326: leaq (%rdi,%r8,4), %rax cmpq %rcx, %rax jbe .L328 .L331: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L331 .L328: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -12(%rax,%rbx,4), %rdi xorl %esi, %esi movq %rax, %rcx movq %rax, %rdx cmpq %rdi, %rax jae .L337 .L343: movl 4(%rdx), %eax addl (%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addq $16, %rdx addl %eax, %esi cmpq %rdx, %rdi ja .L343 movq %rcx, %rax notq %rax addq %rdi, %rax andq $-16, %rax leaq 16(%rcx,%rax), %rcx .L337: leaq 12(%rdi), %rax cmpq %rcx, %rax jbe .L339 .L342: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L342 .L339: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -8(%rax,%rbx,4), %rdi xorl %esi, %esi movq %rax, %rcx movq %rax, %rdx cmpq %rdi, %rax jae .L348 .L354: movl 4(%rdx), %eax addl (%rdx), %eax addl 8(%rdx), %eax addq $12, %rdx addl %eax, %esi cmpq %rdx, %rdi ja .L354 movq %rcx, %rdx movabsq $-6148914691236517205, %rax notq %rdx addq %rdi, %rdx mulq %rdx shrq $3, %rdx leaq 3(%rdx,%rdx,2), %rdx leaq (%rcx,%rdx,4), %rcx .L348: leaq 8(%rdi), %rax cmpq %rcx, %rax jbe .L350 .L353: addl (%rcx), %esi addq $4, %rcx cmpq %rcx, %rax ja .L353 .L350: movl %esi, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movl %ebx, %edx movq %rax, %rcx shrl $31, %edx leal (%rbx,%rdx), %eax movslq %ebx,%rbx andl $1, %eax subl %edx, %eax xorl %edx, %edx movslq %eax,%rdi movq %rcx, %rax subq %rdi, %rbx leaq (%rcx,%rbx,4), %rsi cmpq %rsi, %rcx jae .L359 .L365: addl 4(%rax), %edx addl (%rax), %edx addq $8, %rax cmpq %rax, %rsi ja .L365 movq %rcx, %rax notq %rax addq %rsi, %rax shrq $3, %rax leaq 8(%rcx,%rax,8), %rcx .L359: leaq (%rsi,%rdi,4), %rax cmpq %rcx, %rax jbe .L361 .L364: addl (%rcx), %edx addq $4, %rcx cmpq %rcx, %rax ja .L364 .L361: movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll16a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %esi, %esi xorl %edx, %edx cmpl $15, %ebp movq %rax, %r9 jle .L370 leal -16(%rbp), %r8d movq %rax, %rdx xorl %esi, %esi xorl %ecx, %ecx shrl $4, %r8d mov %r8d, %eax leaq 1(%rax), %rdi salq $6, %rdi .L371: movl 4(%rdx), %eax addl (%rdx), %eax addq $64, %rcx addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addl 32(%rdx), %eax addl 36(%rdx), %eax addl 40(%rdx), %eax addl 44(%rdx), %eax addl 48(%rdx), %eax addl 52(%rdx), %eax addl 56(%rdx), %eax addl 60(%rdx), %eax addq $64, %rdx addl %eax, %esi cmpq %rdi, %rcx jne .L371 leal 1(%r8), %edx sall $4, %edx .L370: cmpl %edx, %ebp jle .L372 movslq %edx,%rax leaq (%r9,%rax,4), %rax .L373: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L373 .L372: popq %rbx popq %rbp movl %esi, (%r12) popq %r12 ret unroll8a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %edi, %edi xorl %edx, %edx xorl %esi, %esi cmpl $7, %ebp movq %rax, %rcx jle .L379 leal -8(%rbp), %r8d shrl $3, %r8d mov %r8d, %eax leaq 1(%rax), %rsi salq $5, %rsi .L384: movl 4(%rcx,%rdx), %eax addl (%rcx,%rdx), %eax addl 8(%rcx,%rdx), %eax addl 12(%rcx,%rdx), %eax addl 16(%rcx,%rdx), %eax addl 20(%rcx,%rdx), %eax addl 24(%rcx,%rdx), %eax addl 28(%rcx,%rdx), %eax addq $32, %rdx addl %eax, %edi cmpq %rsi, %rdx jne .L384 leal 8(,%r8,8), %esi .L379: cmpl %esi, %ebp jle .L381 movslq %esi,%rax leaq (%rcx,%rax,4), %rax .L382: addl $1, %esi addl (%rax), %edi addq $4, %rax cmpl %esi, %ebp jg .L382 .L381: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %edi, %edi xorl %edx, %edx xorl %ecx, %ecx cmpl $3, %ebp movq %rax, %rsi jle .L389 leal -4(%rbp), %r8d shrl $2, %r8d mov %r8d, %eax leaq 1(%rax), %rcx salq $4, %rcx .L394: movl 4(%rsi,%rdx), %eax addl (%rsi,%rdx), %eax addl 8(%rsi,%rdx), %eax addl 12(%rsi,%rdx), %eax addq $16, %rdx addl %eax, %edi cmpq %rcx, %rdx jne .L394 leal 4(,%r8,4), %ecx .L389: cmpl %ecx, %ebp jle .L391 movslq %ecx,%rax leaq (%rsi,%rax,4), %rax .L392: addl $1, %ecx addl (%rax), %edi addq $4, %rax cmpl %ecx, %ebp jg .L392 .L391: popq %rbx popq %rbp movl %edi, (%r12) popq %r12 ret unroll3aw_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx xorl %edx, %edx cmpl $1, %ebp movq %rax, %rdi jle .L399 leal -2(%rbp), %r8d movl $-1431655765, %eax mull %r8d movl %edx, %r8d shrl %r8d mov %r8d, %eax leaq 3(%rax,%rax,2), %rax leaq 0(,%rax,4), %rdx .L404: movl 4(%rdi,%rcx), %eax addl (%rdi,%rcx), %eax addl 8(%rdi,%rcx), %eax addq $12, %rcx addl %eax, %esi cmpq %rdx, %rcx jne .L404 leal 3(%r8,%r8,2), %edx .L399: cmpl %edx, %ebp jle .L401 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L402: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L402 .L401: popq %rbx popq %rbp movl %esi, (%r12) popq %r12 ret combine5p: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length cltq xorl %esi, %esi leaq (%rbp,%rax,4), %r8 leaq -8(%r8), %rax cmpq %rax, %rbp jae .L409 movq %r8, %rax movabsq $-6148914691236517205, %rdi leaq 4(%rbp), %rcx subq %rbp, %rax xorl %esi, %esi subq $9, %rax mulq %rdi movq %rdx, %rdi shrq $3, %rdi leaq (%rdi,%rdi,2), %rax leaq 16(%rbp,%rax,4), %rdx .L410: movl (%rcx), %eax addl -4(%rcx), %eax addl 4(%rcx), %eax addq $12, %rcx addl %eax, %esi cmpq %rdx, %rcx jne .L410 leaq 3(%rdi,%rdi,2), %rax leaq (%rbp,%rax,4), %rbp .L409: cmpq %r8, %rbp jae .L411 .L414: addl (%rbp), %esi addq $4, %rbp cmpq %rbp, %r8 ja .L414 .L411: popq %rbx popq %rbp movl %esi, (%r12) popq %r12 ret combine5: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %esi, %esi xorl %ecx, %ecx xorl %edx, %edx cmpl $2, %ebp movq %rax, %rdi jle .L419 leal -3(%rbp), %r8d movl $-1431655765, %eax mull %r8d movl %edx, %r8d shrl %r8d mov %r8d, %eax leaq 3(%rax,%rax,2), %rax leaq 0(,%rax,4), %rdx .L424: movl 4(%rdi,%rcx), %eax addl (%rdi,%rcx), %eax addl 8(%rdi,%rcx), %eax addq $12, %rcx addl %eax, %esi cmpq %rdx, %rcx jne .L424 leal 3(%r8,%r8,2), %edx .L419: cmpl %edx, %ebp jle .L421 movslq %edx,%rax leaq (%rdi,%rax,4), %rax .L422: addl $1, %edx addl (%rax), %esi addq $4, %rax cmpl %edx, %ebp jg .L422 .L421: popq %rbx popq %rbp movl %esi, (%r12) popq %r12 ret unroll2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx xorl %esi, %esi cmpl $1, %ebp movq %rax, %rdi jle .L429 leal -2(%rbp), %esi shrl %esi mov %esi, %eax leaq 8(,%rax,8), %rax .L434: addl 4(%rdi,%rdx), %ecx addl (%rdi,%rdx), %ecx addq $8, %rdx cmpq %rax, %rdx jne .L434 leal 2(%rsi,%rsi), %esi .L429: cmpl %esi, %ebp jle .L431 movslq %esi,%rax leaq (%rdi,%rax,4), %rax .L432: addl $1, %esi addl (%rax), %ecx addq $4, %rax cmpl %esi, %ebp jg .L432 .L431: popq %rbx popq %rbp movl %ecx, (%r12) popq %r12 ret combine4p: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq (%rax,%rbx,4), %rcx xorl %edx, %edx cmpq %rcx, %rax jae .L439 .L442: addl (%rax), %edx addq $4, %rax cmpq %rax, %rcx ja .L442 .L439: movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret combine4: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx testl %ebp, %ebp jle .L446 .L449: addl (%rax,%rdx,4), %ecx addq $1, %rdx cmpl %edx, %ebp jg .L449 .L446: popq %rbx popq %rbp movl %ecx, (%r12) popq %r12 ret combine3: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start testl %ebp, %ebp movl $0, (%r12) jle .L454 xorl %ecx, %ecx xorl %edx, %edx .L453: addl (%rax,%rdx,4), %ecx addq $1, %rdx cmpl %edx, %ebp movl %ecx, (%r12) jg .L453 .L454: popq %rbx popq %rbp popq %r12 ret combine2: pushq %r14 pushq %r13 movq %rdi, %r13 pushq %r12 pushq %rbp movq %rsi, %rbp pushq %rbx subq $16, %rsp call vec_length testl %eax, %eax movl %eax, %r12d movl $0, (%rbp) jle .L459 leaq 12(%rsp), %r14 xorl %ebx, %ebx .L458: movl %ebx, %esi movq %r14, %rdx movq %r13, %rdi addl $1, %ebx call get_vec_element movl 12(%rsp), %eax addl %eax, (%rbp) cmpl %ebx, %r12d jg .L458 .L459: addq $16, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 ret combine1: pushq %r13 pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx xorl %ebx, %ebx subq $24, %rsp movl $0, (%rsi) leaq 20(%rsp), %r13 jmp .L462 .L463: movl %ebx, %esi movq %r13, %rdx movq %r12, %rdi call get_vec_element movl 20(%rsp), %eax addl %eax, (%rbp) addl $1, %ebx .L462: movq %r12, %rdi call vec_length cmpl %eax, %ebx jl .L463 addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine1_descr: combine2_descr: combine3_descr: combine4_descr: combine4p_descr: unroll2a_descr: combine5_descr: combine5p_descr: unroll3aw_descr: unroll4a_descr: unroll8a_descr: unroll16a_descr: unroll2_descr: unroll3_descr: unroll4_descr: unroll8_descr: unroll16_descr: combine6_descr: unroll4x2a_descr: unroll8x2a_descr: unroll3x3a_descr: unroll4x4a_descr: unroll8x4a_descr: unroll12x6a_descr: unroll12x12a_descr: unroll6x6a_descr: unroll8x8a_descr: unroll10x10a_descr: unrollx2as_descr: unroll4x2as_descr: unroll8x2_descr: unroll9x3_descr: unroll8x4_descr: unroll8x8_descr: unroll2aa_descr: unroll3aa_descr: unroll4aa_descr: unroll6aa_descr: unroll8aa_descr: unrollv1_descr: unrollv2_descr: unrollv4_descr: unrollv8_descr: unrollv12_descr: unrollv2a_descr: unrollv4a_descr: unrollv8a_descr: .Lframe1: