unrollv8a_descr: unrollv4a_descr: unrollv2a_descr: unrollv12_descr: unrollv8_descr: unrollv4_descr: unrollv2_descr: unrollv1_descr: unroll8aa_descr: unroll6aa_descr: unroll4aa_descr: unroll3aa_descr: unroll2aa_descr: unroll8x8_descr: unroll8x4_descr: unroll9x3_descr: unroll8x2_descr: unroll4x2as_descr: unrollx2as_descr: unroll10x10a_descr: unroll8x8a_descr: unroll6x6a_descr: unroll12x12a_descr: unroll12x6a_descr: unroll8x4a_descr: unroll4x4a_descr: unroll3x3a_descr: unroll8x2a_descr: unroll4x2a_descr: combine6_descr: unroll16_descr: unroll8_descr: unroll4_descr: unroll3_descr: unroll2_descr: unroll16a_descr: unroll8a_descr: unroll4a_descr: unroll3aw_descr: combine5p_descr: combine5_descr: unroll2a_descr: combine4p_descr: combine4_descr: combine3_descr: combine2_descr: combine1_descr: combine1: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx xorl %ebx, %ebx subq $16, %rsp movl $0, (%rsi) jmp .L2 .L5: leaq 12(%rsp), %rdx movl %ebx, %esi movq %r12, %rdi incl %ebx call get_vec_element movl 12(%rsp), %eax addl %eax, (%rbp) .L2: movq %r12, %rdi call vec_length cmpl %ebx, %eax jg .L5 addq $16, %rsp popq %rbx popq %rbp popq %r12 ret combine2: movq %rbx, -32(%rsp) movq %rbp, -24(%rsp) xorl %ebx, %ebx movq %r12, -16(%rsp) movq %r13, -8(%rsp) subq $40, %rsp movq %rsi, %rbp movq %rdi, %r13 call vec_length movl %eax, %r12d movl $0, (%rbp) cmpl %r12d, %ebx jge .L13 .L15: leaq 4(%rsp), %rdx movl %ebx, %esi movq %r13, %rdi incl %ebx call get_vec_element movl 4(%rsp), %eax addl %eax, (%rbp) cmpl %r12d, %ebx jl .L15 .L13: movq 8(%rsp), %rbx movq 16(%rsp), %rbp movq 24(%rsp), %r12 movq 32(%rsp), %r13 addq $40, %rsp ret combine3: pushq %r12 pushq %rbp movq %rsi, %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %r12d call get_vec_start testl %r12d, %r12d movl $0, (%rbp) jle .L22 movq %rax, %rcx movl %r12d, %edx .L20: movl (%rcx), %eax addq $4, %rcx addl %eax, (%rbp) decl %edx jne .L20 .L22: popq %rbx popq %rbp popq %r12 ret combine4: movq %rbx, -24(%rsp) movq %rbp, -16(%rsp) movq %rdi, %rbx movq %r12, -8(%rsp) subq $24, %rsp movq %rsi, %r12 call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start xorl %ecx, %ecx cmpl %ebp, %ecx jge .L29 movq %rax, %rdx movl %ebp, %eax .L27: addl (%rdx), %ecx addq $4, %rdx decl %eax jne .L27 .L29: movl %ecx, (%r12) movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 addq $24, %rsp ret combine4p: movq %rbx, -24(%rsp) movq %rbp, -16(%rsp) movq %rsi, %rbp movq %r12, -8(%rsp) subq $24, %rsp movq %rdi, %r12 call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq (%rax,%rbx,4), %rcx xorl %edx, %edx jmp .L37 .L38: addl (%rax), %edx addq $4, %rax .L37: cmpq %rcx, %rax jb .L38 movl %edx, (%rbp) movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 addq $24, %rsp ret unroll2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx movq %rax, %rsi cmpl %ebp, %ecx jmp .L52 .L53: movslq %edx,%rax addl $2, %edx addl (%rsi,%rax,4), %ecx addl 4(%rsi,%rax,4), %ecx cmpl %ebp, %edx .L52: jl .L53 cmpl %r12d, %edx jge .L51 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L47: addl (%rax), %ecx addq $4, %rax decl %edx jne .L47 .L51: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine5: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx movq %rax, %rsi cmpl %ebp, %ecx jmp .L67 .L68: movslq %edx,%rax addl $3, %edx addl (%rsi,%rax,4), %ecx addl 4(%rsi,%rax,4), %ecx addl 8(%rsi,%rax,4), %ecx cmpl %ebp, %edx .L67: jl .L68 cmpl %r12d, %edx jge .L66 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L62: addl (%rax), %ecx addq $4, %rax decl %edx jne .L62 .L66: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine5p: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length cltq leaq (%rbp,%rax,4), %rdx xorl %eax, %eax leaq -8(%rdx), %rcx jmp .L82 .L84: addl (%rbp), %eax addl 4(%rbp), %eax addl 8(%rbp), %eax addq $12, %rbp .L82: cmpq %rcx, %rbp jb .L84 cmpq %rdx, %rbp jae .L81 .L85: addl (%rbp), %eax addq $4, %rbp cmpq %rdx, %rbp jb .L85 .L81: popq %rbx popq %rbp movl %eax, (%r12) popq %r12 ret unroll3aw_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %edx, %edx movq %rax, %rsi cmpl %ebp, %ecx jge .L95 xorl %eax, %eax .L89: addl $3, %edx addl (%rsi,%rax,4), %ecx movslq %edx,%rax addl -8(%rsi,%rax,4), %ecx addl -4(%rsi,%rax,4), %ecx cmpl %ebp, %edx jl .L89 .L95: cmpl %r12d, %edx jge .L97 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L93: addl (%rax), %ecx addq $4, %rax decl %edx jne .L93 .L97: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %edx, %edx xorl %esi, %esi movq %rax, %rcx cmpl %ebp, %edx jmp .L111 .L112: movslq %esi,%rax addl $4, %esi addl (%rcx,%rax,4), %edx addl 4(%rcx,%rax,4), %edx addl 8(%rcx,%rax,4), %edx addl 12(%rcx,%rax,4), %edx cmpl %ebp, %esi .L111: jl .L112 cmpl %r12d, %esi jge .L110 movslq %esi,%rax subl %esi, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %esi .L106: addl (%rax), %edx addq $4, %rax decl %esi jne .L106 .L110: movl %edx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %esi, %esi movq %rax, %rdx cmpl %ebp, %ecx jmp .L126 .L127: movslq %esi,%rax addl $8, %esi addl (%rdx,%rax,4), %ecx addl 4(%rdx,%rax,4), %ecx addl 8(%rdx,%rax,4), %ecx addl 12(%rdx,%rax,4), %ecx addl 16(%rdx,%rax,4), %ecx addl 20(%rdx,%rax,4), %ecx addl 24(%rdx,%rax,4), %ecx addl 28(%rdx,%rax,4), %ecx cmpl %ebp, %esi .L126: jl .L127 cmpl %r12d, %esi jge .L125 movslq %esi,%rax subl %esi, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %esi .L121: addl (%rax), %ecx addq $4, %rax decl %esi jne .L121 .L125: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll16a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -15(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %esi, %esi movq %rax, %rdx cmpl %ebp, %ecx jmp .L141 .L142: movslq %esi,%rax addl $16, %esi addl (%rdx,%rax,4), %ecx addl 4(%rdx,%rax,4), %ecx addl 8(%rdx,%rax,4), %ecx addl 12(%rdx,%rax,4), %ecx addl 16(%rdx,%rax,4), %ecx addl 20(%rdx,%rax,4), %ecx addl 24(%rdx,%rax,4), %ecx addl 28(%rdx,%rax,4), %ecx addl 32(%rdx,%rax,4), %ecx addl 36(%rdx,%rax,4), %ecx addl 40(%rdx,%rax,4), %ecx addl 44(%rdx,%rax,4), %ecx addl 48(%rdx,%rax,4), %ecx addl 52(%rdx,%rax,4), %ecx addl 56(%rdx,%rax,4), %ecx addl 60(%rdx,%rax,4), %ecx cmpl %ebp, %esi .L141: jl .L142 cmpl %r12d, %esi jge .L140 movslq %esi,%rax subl %esi, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %esi .L136: addl (%rax), %ecx addq $4, %rax decl %esi jne .L136 .L140: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx movl %ebx, %eax movl %ebx, %ecx shrl $31, %eax leal (%rbx,%rax), %eax movslq %ebx,%rbx andl $-2, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx xorl %eax, %eax jmp .L154 .L156: addl (%rdx), %eax addl 4(%rdx), %eax addq $8, %rdx .L154: cmpq %rcx, %rdx jb .L156 leaq (%rcx,%rsi,4), %rcx jmp .L155 .L157: addl (%rdx), %eax addq $4, %rdx .L155: cmpq %rcx, %rdx jb .L157 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -8(%rax,%rbx,4), %rcx xorl %edx, %edx jmp .L169 .L171: addl (%rax), %edx addl 4(%rax), %edx addl 8(%rax), %edx addq $12, %rax .L169: cmpq %rcx, %rax jb .L171 addq $8, %rcx jmp .L170 .L172: addl (%rax), %edx addq $4, %rax .L170: cmpq %rcx, %rax jb .L172 movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -12(%rax,%rbx,4), %rcx xorl %edx, %edx jmp .L184 .L186: addl (%rax), %edx addl 4(%rax), %edx addl 8(%rax), %edx addl 12(%rax), %edx addq $16, %rax .L184: cmpq %rcx, %rax jb .L186 addq $12, %rcx jmp .L185 .L187: addl (%rax), %edx addq $4, %rax .L185: cmpq %rcx, %rax jb .L187 movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx leal 7(%rbx), %eax cmpl $-1, %ebx movl %ebx, %ecx cmovg %ebx, %eax movslq %ebx,%rbx andl $-8, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx xorl %eax, %eax jmp .L200 .L202: addl (%rdx), %eax addl 4(%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addq $32, %rdx .L200: cmpq %rcx, %rdx jb .L202 leaq (%rcx,%rsi,4), %rcx jmp .L201 .L203: addl (%rdx), %eax addq $4, %rdx .L201: cmpq %rcx, %rdx jb .L203 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll16_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx leal 15(%rbx), %eax cmpl $-1, %ebx movl %ebx, %ecx cmovg %ebx, %eax movslq %ebx,%rbx andl $-16, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx xorl %eax, %eax jmp .L216 .L218: addl (%rdx), %eax addl 4(%rdx), %eax addl 8(%rdx), %eax addl 12(%rdx), %eax addl 16(%rdx), %eax addl 20(%rdx), %eax addl 24(%rdx), %eax addl 28(%rdx), %eax addl 32(%rdx), %eax addl 36(%rdx), %eax addl 40(%rdx), %eax addl 44(%rdx), %eax addl 48(%rdx), %eax addl 52(%rdx), %eax addl 56(%rdx), %eax addl 60(%rdx), %eax addq $64, %rdx .L216: cmpq %rcx, %rdx jb .L218 leaq (%rcx,%rsi,4), %rcx jmp .L217 .L219: addl (%rdx), %eax addq $4, %rdx .L217: cmpq %rcx, %rdx jb .L219 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret combine6: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %edi, %edi xorl %edx, %edx movq %rax, %rcx cmpl %ebp, %esi jmp .L233 .L234: movslq %edx,%rax addl $2, %edx addl (%rcx,%rax,4), %esi addl 4(%rcx,%rax,4), %edi cmpl %ebp, %edx .L233: jl .L234 cmpl %r12d, %edx jge .L232 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L228: addl (%rax), %esi addq $4, %rax decl %edx jne .L228 .L232: leal (%rsi,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %esi, %esi xorl %edi, %edi xorl %edx, %edx movq %rax, %rcx cmpl %ebp, %esi jmp .L248 .L249: movslq %edx,%rax addl $4, %edx addl (%rcx,%rax,4), %esi addl 4(%rcx,%rax,4), %edi addl 8(%rcx,%rax,4), %esi addl 12(%rcx,%rax,4), %edi cmpl %ebp, %edx .L248: jl .L249 cmpl %r12d, %edx jge .L247 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L243: addl (%rax), %esi addq $4, %rax decl %edx jne .L243 .L247: leal (%rsi,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %ecx, %ecx xorl %edi, %edi xorl %esi, %esi movq %rax, %rdx cmpl %ebp, %ecx jmp .L263 .L264: movslq %esi,%rax addl $8, %esi addl (%rdx,%rax,4), %ecx addl 4(%rdx,%rax,4), %edi addl 8(%rdx,%rax,4), %ecx addl 12(%rdx,%rax,4), %edi addl 16(%rdx,%rax,4), %ecx addl 20(%rdx,%rax,4), %edi addl 24(%rdx,%rax,4), %ecx addl 28(%rdx,%rax,4), %edi cmpl %ebp, %esi .L263: jl .L264 cmpl %r12d, %esi jge .L262 movslq %esi,%rax subl %esi, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %esi .L258: addl (%rax), %ecx addq $4, %rax decl %esi jne .L258 .L262: leal (%rcx,%rdi), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3x3a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %edx, %edx movq %rax, %rcx cmpl %ebp, %edi jmp .L278 .L279: movslq %edx,%rax addl $3, %edx addl (%rcx,%rax,4), %esi addl 4(%rcx,%rax,4), %r8d addl 8(%rcx,%rax,4), %edi cmpl %ebp, %edx .L278: jl .L279 cmpl %r12d, %edx jge .L277 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L273: addl (%rax), %esi addq $4, %rax decl %edx jne .L273 .L277: leal (%rsi,%r8), %eax addl %edi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %r9d, %r9d xorl %edx, %edx movq %rax, %rcx cmpl %ebp, %edi jmp .L293 .L294: movslq %edx,%rax addl $4, %edx addl (%rcx,%rax,4), %esi addl 4(%rcx,%rax,4), %r8d addl 8(%rcx,%rax,4), %r9d addl 12(%rcx,%rax,4), %edi cmpl %ebp, %edx .L293: jl .L294 cmpl %r12d, %edx jge .L292 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L288: addl (%rax), %esi addq $4, %rax decl %edx jne .L288 .L292: leal (%rsi,%r8), %eax addl %r9d, %eax addl %edi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %r9d, %r9d xorl %ecx, %ecx movq %rax, %rdx cmpl %ebp, %edi jmp .L308 .L309: movslq %ecx,%rax addl $8, %ecx addl (%rdx,%rax,4), %esi addl 4(%rdx,%rax,4), %r8d addl 8(%rdx,%rax,4), %r9d addl 12(%rdx,%rax,4), %edi addl 16(%rdx,%rax,4), %esi addl 20(%rdx,%rax,4), %r8d addl 24(%rdx,%rax,4), %r9d addl 28(%rdx,%rax,4), %edi cmpl %ebp, %ecx .L308: jl .L309 cmpl %r12d, %ecx jge .L307 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx .L303: addl (%rax), %esi addq $4, %rax decl %ecx jne .L303 .L307: leal (%rsi,%r8), %eax addl %r9d, %eax addl %edi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll12x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -11(%r12), %ebp call get_vec_start xorl %r8d, %r8d xorl %edi, %edi xorl %r10d, %r10d xorl %r11d, %r11d xorl %edx, %edx xorl %r9d, %r9d xorl %esi, %esi movq %rax, %rcx cmpl %ebp, %r8d jmp .L323 .L324: movslq %esi,%rax addl $12, %esi addl (%rcx,%rax,4), %edi addl 4(%rcx,%rax,4), %r10d addl 8(%rcx,%rax,4), %r11d addl 12(%rcx,%rax,4), %edx addl 16(%rcx,%rax,4), %r8d addl 20(%rcx,%rax,4), %r9d addl 24(%rcx,%rax,4), %edi addl 28(%rcx,%rax,4), %r10d addl 32(%rcx,%rax,4), %r11d addl 36(%rcx,%rax,4), %edx addl 40(%rcx,%rax,4), %r8d addl 44(%rcx,%rax,4), %r9d cmpl %ebp, %esi .L323: jl .L324 cmpl %r12d, %esi jge .L322 movslq %esi,%rax subl %esi, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %esi .L318: addl (%rax), %edi addq $4, %rax decl %esi jne .L318 .L322: leal (%r11,%rdx), %edx leal (%rdi,%r10), %eax addl %edx, %eax leal (%r8,%r9), %edx addl %edx, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll12x12a_combine: pushq %r15 xorl %r15d, %r15d pushq %r14 xorl %r14d, %r14d pushq %r13 xorl %r13d, %r13d pushq %r12 xorl %r12d, %r12d pushq %rbp xorl %ebp, %ebp pushq %rbx movq %rdi, %rbx subq $24, %rsp movq %rsi, 16(%rsp) call vec_length movq %rbx, %rdi movl %eax, 12(%rsp) subl $11, %eax movl %eax, 8(%rsp) xorl %ebx, %ebx call get_vec_start xorl %edi, %edi xorl %edx, %edx xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %r8d, %r8d xorl %esi, %esi cmpl 8(%rsp), %r8d movq %rax, %rcx jmp .L338 .L339: movslq %esi,%rax addl $12, %esi addl (%rcx,%rax,4), %edi addl 24(%rcx,%rax,4), %r14d addl 4(%rcx,%rax,4), %ebp addl 28(%rcx,%rax,4), %r10d addl 8(%rcx,%rax,4), %r12d addl 32(%rcx,%rax,4), %r15d addl 12(%rcx,%rax,4), %edx addl 36(%rcx,%rax,4), %r11d addl 16(%rcx,%rax,4), %r13d addl 40(%rcx,%rax,4), %r8d addl 20(%rcx,%rax,4), %r9d addl 44(%rcx,%rax,4), %ebx cmpl 8(%rsp), %esi .L338: jl .L339 cmpl 12(%rsp), %esi jge .L337 movslq %esi,%rax leaq (%rcx,%rax,4), %rax movl 12(%rsp), %ecx subl %esi, %ecx movl %ecx, %esi .L333: addl (%rax), %edi addq $4, %rax decl %esi jne .L333 .L337: leal (%r12,%rdx), %edx leal (%rdi,%rbp), %eax addl %edx, %eax leal (%r13,%r9), %edx addl %edx, %eax leal (%r14,%r10), %edx addl %edx, %eax leal (%r15,%r11), %edx addl %edx, %eax leal (%r8,%rbx), %edx addl %edx, %eax movq 16(%rsp), %rdx movl %eax, (%rdx) addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret unroll6x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %ecx, %ecx movq %rax, %rdx cmpl %ebp, %edi jmp .L353 .L354: movslq %ecx,%rax addl $6, %ecx addl (%rdx,%rax,4), %esi addl 4(%rdx,%rax,4), %r8d addl 8(%rdx,%rax,4), %r9d addl 12(%rdx,%rax,4), %r10d addl 16(%rdx,%rax,4), %r11d addl 20(%rdx,%rax,4), %edi cmpl %ebp, %ecx .L353: jl .L354 cmpl %r12d, %ecx jge .L352 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx .L348: addl (%rax), %esi addq $4, %rax decl %ecx jne .L348 .L352: leal (%rsi,%r8), %eax addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %edi, %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x8a_combine: pushq %r14 movq %rsi, %r14 pushq %r13 pushq %r12 pushq %rbp xorl %ebp, %ebp pushq %rbx movq %rdi, %rbx call vec_length movl %eax, %r13d movq %rbx, %rdi xorl %ebx, %ebx leal -7(%r13), %r12d call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %ecx, %ecx movq %rax, %rdx cmpl %r12d, %edi jmp .L368 .L369: movslq %ecx,%rax addl $8, %ecx addl (%rdx,%rax,4), %esi addl 4(%rdx,%rax,4), %r8d addl 8(%rdx,%rax,4), %r9d addl 12(%rdx,%rax,4), %r10d addl 16(%rdx,%rax,4), %r11d addl 20(%rdx,%rax,4), %ebx addl 24(%rdx,%rax,4), %ebp addl 28(%rdx,%rax,4), %edi cmpl %r12d, %ecx .L368: jl .L369 cmpl %r13d, %ecx jge .L367 movslq %ecx,%rax subl %ecx, %r13d leaq (%rdx,%rax,4), %rax movl %r13d, %ecx .L363: addl (%rax), %esi addq $4, %rax decl %ecx jne .L363 .L367: leal (%rsi,%r8), %eax addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax popq %rbx addl %ebp, %eax popq %rbp popq %r12 addl %edi, %eax popq %r13 movl %eax, (%r14) popq %r14 ret unroll10x10a_combine: pushq %r15 pushq %r14 pushq %r13 xorl %r13d, %r13d pushq %r12 xorl %r12d, %r12d pushq %rbp xorl %ebp, %ebp pushq %rbx movq %rdi, %rbx subq $8, %rsp movq %rsi, (%rsp) call vec_length movl %eax, %r15d movq %rbx, %rdi xorl %ebx, %ebx leal -9(%r15), %r14d call get_vec_start xorl %edi, %edi xorl %esi, %esi xorl %r8d, %r8d xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %ecx, %ecx movq %rax, %rdx cmpl %r14d, %edi jmp .L383 .L384: movslq %ecx,%rax addl $10, %ecx addl (%rdx,%rax,4), %esi addl 4(%rdx,%rax,4), %r8d addl 8(%rdx,%rax,4), %r9d addl 12(%rdx,%rax,4), %r10d addl 16(%rdx,%rax,4), %r11d addl 20(%rdx,%rax,4), %ebx addl 24(%rdx,%rax,4), %ebp addl 28(%rdx,%rax,4), %r12d addl 32(%rdx,%rax,4), %r13d addl 36(%rdx,%rax,4), %edi cmpl %r14d, %ecx .L383: jl .L384 cmpl %r15d, %ecx jge .L382 movslq %ecx,%rax subl %ecx, %r15d leaq (%rdx,%rax,4), %rax movl %r15d, %ecx .L378: addl (%rax), %esi addq $4, %rax decl %ecx jne .L378 .L382: leal (%rsi,%r8), %eax movq (%rsp), %rdx addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax addl %ebp, %eax addl %r12d, %eax addl %r13d, %eax addl %edi, %eax movl %eax, (%rdx) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret unrollx2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%r12,%rax), %ebp call get_vec_start sarl %ebp movq %rax, %rsi xorl %edi, %edi movslq %ebp,%rax xorl %ecx, %ecx xorl %edx, %edx leaq (%rsi,%rax,4), %r8 cmpl %ebp, %edi jmp .L398 .L399: movslq %edx,%rax incl %edx addl (%rsi,%rax,4), %edi addl (%r8,%rax,4), %ecx cmpl %ebp, %edx .L398: jl .L399 leal (%rbp,%rbp), %edx cmpl %r12d, %edx jge .L397 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L393: addl (%rax), %ecx addq $4, %rax decl %edx jne .L393 .L397: leal (%rdi,%rcx), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%r12,%rax), %ebp call get_vec_start sarl %ebp movq %rax, %rsi xorl %edi, %edi movslq %ebp,%rax xorl %ecx, %ecx xorl %edx, %edx leaq (%rsi,%rax,4), %r8 cmpl %ebp, %edi jmp .L413 .L414: movslq %edx,%rax incl %edx addl (%rsi,%rax,4), %edi addl (%r8,%rax,4), %ecx cmpl %ebp, %edx .L413: jl .L414 leal (%rbp,%rbp), %edx cmpl %r12d, %edx jge .L412 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L408: addl (%rax), %ecx addq $4, %rax decl %edx jne .L408 .L412: leal (%rdi,%rcx), %eax movl %eax, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rsi xorl %edx, %edx xorl %ecx, %ecx jmp .L426 .L428: addl (%rax), %edx addl 4(%rax), %ecx addl 8(%rax), %edx addl 12(%rax), %ecx addl 16(%rax), %edx addl 20(%rax), %ecx addl 24(%rax), %edx addl 28(%rax), %ecx addq $32, %rax .L426: cmpq %rsi, %rax jb .L428 addq $28, %rsi jmp .L427 .L429: addl (%rax), %edx addq $4, %rax .L427: cmpq %rsi, %rax jb .L429 leal (%rdx,%rcx), %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll9x3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -32(%rax,%rbx,4), %rdi xorl %edx, %edx xorl %ecx, %ecx xorl %esi, %esi jmp .L441 .L443: addl (%rax), %edx addl 4(%rax), %ecx addl 8(%rax), %esi addl 12(%rax), %edx addl 16(%rax), %ecx addl 20(%rax), %esi addl 24(%rax), %edx addl 28(%rax), %ecx addl 32(%rax), %esi addq $36, %rax .L441: cmpq %rdi, %rax jb .L443 addq $32, %rdi jmp .L442 .L444: addl (%rax), %edx addq $4, %rax .L442: cmpq %rdi, %rax jb .L444 leal (%rdx,%rcx), %eax addl %esi, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8x4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rcx xorl %edx, %edx xorl %esi, %esi xorl %edi, %edi xorl %r8d, %r8d jmp .L456 .L458: addl (%rax), %edx addl 4(%rax), %esi addl 8(%rax), %edi addl 12(%rax), %r8d addl 16(%rax), %edx addl 20(%rax), %esi addl 24(%rax), %edi addl 28(%rax), %r8d addq $32, %rax .L456: cmpq %rcx, %rax jb .L458 addq $28, %rcx jmp .L457 .L459: addl (%rax), %edx addq $4, %rax .L457: cmpq %rcx, %rax jb .L459 leal (%rdx,%rsi), %eax addl %edi, %eax addl %r8d, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8x8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rdx xorl %ecx, %ecx xorl %esi, %esi xorl %edi, %edi xorl %r8d, %r8d xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %ebx, %ebx jmp .L471 .L473: addl (%rax), %ecx addl 4(%rax), %esi addl 8(%rax), %edi addl 12(%rax), %r8d addl 16(%rax), %r9d addl 20(%rax), %r10d addl 24(%rax), %r11d addl 28(%rax), %ebx addq $32, %rax .L471: cmpq %rdx, %rax jb .L473 addq $28, %rdx jmp .L472 .L474: addl (%rax), %ecx addq $4, %rax .L472: cmpq %rdx, %rax jb .L474 leal (%rcx,%rsi), %eax addl %edi, %eax addl %r8d, %eax addl %r9d, %eax addl %r10d, %eax addl %r11d, %eax addl %ebx, %eax movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll2aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %ecx, %ecx movq %rax, %rsi cmpl %ebp, %edi jmp .L488 .L489: movslq %ecx,%rdx addl $2, %ecx movl 4(%rsi,%rdx,4), %eax addl (%rsi,%rdx,4), %eax addl %eax, %edi cmpl %ebp, %ecx .L488: jl .L489 cmpl %r12d, %ecx jge .L487 movslq %ecx,%rax subl %ecx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L483: addl (%rax), %edi addq $4, %rax decl %ecx jne .L483 .L487: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start xorl %edi, %edi xorl %ecx, %ecx movq %rax, %rsi cmpl %ebp, %edi jmp .L503 .L504: movslq %ecx,%rdx addl $3, %ecx movl 4(%rsi,%rdx,4), %eax addl (%rsi,%rdx,4), %eax addl 8(%rsi,%rdx,4), %eax addl %eax, %edi cmpl %ebp, %ecx .L503: jl .L504 cmpl %r12d, %ecx jge .L502 movslq %ecx,%rax subl %ecx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L498: addl (%rax), %edi addq $4, %rax decl %ecx jne .L498 .L502: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start xorl %r8d, %r8d xorl %esi, %esi movq %rax, %rdi cmpl %ebp, %r8d jmp .L518 .L519: movslq %esi,%rax addl $4, %esi movl 4(%rdi,%rax,4), %ecx movl 12(%rdi,%rax,4), %edx addl (%rdi,%rax,4), %ecx addl 8(%rdi,%rax,4), %edx addl %edx, %ecx addl %ecx, %r8d cmpl %ebp, %esi .L518: jl .L519 cmpl %r12d, %esi jge .L517 movslq %esi,%rax subl %esi, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %esi .L513: addl (%rax), %r8d addq $4, %rax decl %esi jne .L513 .L517: movl %r8d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll6aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start xorl %r9d, %r9d xorl %r8d, %r8d movq %rax, %rdi cmpl %ebp, %r9d jmp .L533 .L534: movslq %r8d,%rcx addl $6, %r8d movl 4(%rdi,%rcx,4), %eax movl 12(%rdi,%rcx,4), %esi addl (%rdi,%rcx,4), %eax addl 8(%rdi,%rcx,4), %esi movl 20(%rdi,%rcx,4), %edx addl 16(%rdi,%rcx,4), %edx addl %esi, %eax addl %edx, %eax addl %eax, %r9d cmpl %ebp, %r8d .L533: jl .L534 cmpl %r12d, %r8d jge .L532 movslq %r8d,%rax subl %r8d, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %r8d .L528: addl (%rax), %r9d addq $4, %rax decl %r8d jne .L528 .L532: movl %r9d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start xorl %r9d, %r9d xorl %r8d, %r8d movq %rax, %rdi cmpl %ebp, %r9d jmp .L548 .L549: movslq %r8d,%rdx addl $8, %r8d movl 4(%rdi,%rdx,4), %esi movl 12(%rdi,%rdx,4), %eax addl (%rdi,%rdx,4), %esi addl 8(%rdi,%rdx,4), %eax movl 20(%rdi,%rdx,4), %ecx addl 16(%rdi,%rdx,4), %ecx addl %eax, %esi movl 28(%rdi,%rdx,4), %eax addl 24(%rdi,%rdx,4), %eax addl %eax, %ecx addl %ecx, %esi addl %esi, %r9d cmpl %ebp, %r8d .L548: jl .L549 cmpl %r12d, %r8d jge .L547 movslq %r8d,%rax subl %r8d, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %r8d .L543: addl (%rax), %r9d addq $4, %rax decl %r8d jne .L543 .L547: movl %r9d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unrollv1_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %edx xorl %ecx, %ecx .L554: movslq %ecx,%rax incl %ecx cmpl $3, %ecx movl $0, (%rsp,%rax,4) jle .L554 movdqa (%rsp), %xmm0 jmp .L573 .L575: paddd (%rbp), %xmm0 subl $4, %edx addq $16, %rbp .L573: cmpl $3, %edx jg .L575 testl %edx, %edx jmp .L574 .L576: addl (%rbp), %esi addq $4, %rbp decl %edx .L574: jne .L576 movaps %xmm0, (%rsp) xorl %ecx, %ecx .L564: movslq %ecx,%rax incl %ecx addl (%rsp,%rax,4), %esi cmpl $3, %ecx jle .L564 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %edx xorl %ecx, %ecx .L581: movslq %ecx,%rax incl %ecx cmpl $3, %ecx movl $0, (%rsp,%rax,4) jle .L581 movdqa (%rsp), %xmm0 movdqa %xmm0, %xmm1 jmp .L600 .L602: paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm1 subl $8, %edx addq $32, %rbp .L600: cmpl $7, %edx jg .L602 testl %edx, %edx jmp .L601 .L603: addl (%rbp), %esi addq $4, %rbp decl %edx .L601: jne .L603 paddd %xmm1, %xmm0 movaps %xmm0, (%rsp) xorl %ecx, %ecx .L591: movslq %ecx,%rax incl %ecx addl (%rsp,%rax,4), %esi cmpl $3, %ecx jle .L591 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %edx xorl %ecx, %ecx .L608: movslq %ecx,%rax incl %ecx cmpl $3, %ecx movl $0, (%rsp,%rax,4) jle .L608 movdqa (%rsp), %xmm0 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm2 jmp .L627 .L629: paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm3 paddd 32(%rbp), %xmm1 paddd 48(%rbp), %xmm2 subl $16, %edx addq $64, %rbp .L627: cmpl $15, %edx jg .L629 testl %edx, %edx jmp .L628 .L630: addl (%rbp), %esi addq $4, %rbp decl %edx .L628: jne .L630 paddd %xmm3, %xmm0 paddd %xmm2, %xmm1 paddd %xmm1, %xmm0 movaps %xmm0, (%rsp) xorl %ecx, %ecx .L618: movslq %ecx,%rax incl %ecx addl (%rsp,%rax,4), %esi cmpl $3, %ecx jle .L618 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv8_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %ecx xorl %edx, %edx .L635: movslq %edx,%rax incl %edx cmpl $3, %edx movl $0, (%rsp,%rax,4) jle .L635 movdqa (%rsp), %xmm0 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm6 jmp .L654 .L656: paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm7 paddd 32(%rbp), %xmm1 paddd 48(%rbp), %xmm4 paddd 64(%rbp), %xmm2 paddd 80(%rbp), %xmm5 paddd 96(%rbp), %xmm3 paddd 112(%rbp), %xmm6 subl $32, %ecx subq $-128, %rbp .L654: cmpl $31, %ecx jg .L656 testl %ecx, %ecx jmp .L655 .L657: addl (%rbp), %esi addq $4, %rbp decl %ecx .L655: jne .L657 paddd %xmm7, %xmm0 paddd %xmm4, %xmm1 paddd %xmm5, %xmm2 paddd %xmm1, %xmm0 paddd %xmm6, %xmm3 paddd %xmm2, %xmm0 paddd %xmm3, %xmm0 movaps %xmm0, (%rsp) xorl %edx, %edx .L645: movslq %edx,%rax incl %edx addl (%rsp,%rax,4), %esi cmpl $3, %edx jle .L645 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv12_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %ecx xorl %edx, %edx .L662: movslq %edx,%rax incl %edx cmpl $3, %edx movl $0, (%rsp,%rax,4) jle .L662 movdqa (%rsp), %xmm0 movdqa %xmm0, %xmm11 movdqa %xmm0, %xmm1 movdqa %xmm0, %xmm6 movdqa %xmm0, %xmm2 movdqa %xmm0, %xmm7 movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm8 movdqa %xmm0, %xmm4 movdqa %xmm0, %xmm9 movdqa %xmm0, %xmm5 movdqa %xmm0, %xmm10 jmp .L681 .L683: paddd (%rbp), %xmm0 paddd 16(%rbp), %xmm11 paddd 32(%rbp), %xmm1 paddd 48(%rbp), %xmm6 paddd 64(%rbp), %xmm2 paddd 80(%rbp), %xmm7 paddd 96(%rbp), %xmm3 paddd 112(%rbp), %xmm8 paddd 128(%rbp), %xmm4 paddd 144(%rbp), %xmm9 paddd 160(%rbp), %xmm5 paddd 176(%rbp), %xmm10 subl $48, %ecx addq $192, %rbp .L681: cmpl $47, %ecx jg .L683 testl %ecx, %ecx jmp .L682 .L684: addl (%rbp), %esi addq $4, %rbp decl %ecx .L682: jne .L684 paddd %xmm11, %xmm0 paddd %xmm6, %xmm1 paddd %xmm7, %xmm2 paddd %xmm1, %xmm0 paddd %xmm8, %xmm3 paddd %xmm2, %xmm0 paddd %xmm9, %xmm4 paddd %xmm3, %xmm0 paddd %xmm10, %xmm5 paddd %xmm4, %xmm0 paddd %xmm5, %xmm0 movaps %xmm0, (%rsp) xorl %edx, %edx .L672: movslq %edx,%rax incl %edx addl (%rsp,%rax,4), %esi cmpl $3, %edx jle .L672 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %edx xorl %ecx, %ecx .L689: movslq %ecx,%rax incl %ecx cmpl $3, %ecx movl $0, (%rsp,%rax,4) jle .L689 movdqa (%rsp), %xmm1 jmp .L708 .L710: movdqa (%rbp), %xmm0 subl $8, %edx paddd 16(%rbp), %xmm0 addq $32, %rbp paddd %xmm0, %xmm1 .L708: cmpl $7, %edx jg .L710 testl %edx, %edx jmp .L709 .L711: addl (%rbp), %esi addq $4, %rbp decl %edx .L709: jne .L711 movaps %xmm1, (%rsp) xorl %ecx, %ecx .L699: movslq %ecx,%rax incl %ecx addl (%rsp,%rax,4), %esi cmpl $3, %ecx jle .L699 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %edx xorl %ecx, %ecx .L716: movslq %ecx,%rax incl %ecx cmpl $3, %ecx movl $0, (%rsp,%rax,4) jle .L716 movdqa (%rsp), %xmm2 jmp .L735 .L737: movdqa (%rbp), %xmm1 subl $16, %edx movdqa 32(%rbp), %xmm0 paddd 16(%rbp), %xmm1 paddd 48(%rbp), %xmm0 addq $64, %rbp paddd %xmm0, %xmm1 paddd %xmm1, %xmm2 .L735: cmpl $15, %edx jg .L737 testl %edx, %edx jmp .L736 .L738: addl (%rbp), %esi addq $4, %rbp decl %edx .L736: jne .L738 movaps %xmm2, (%rsp) xorl %ecx, %ecx .L726: movslq %ecx,%rax incl %ecx addl (%rsp,%rax,4), %esi cmpl $3, %ecx jle .L726 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret unrollv8a_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $16, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length xorl %esi, %esi movl %eax, %ecx xorl %edx, %edx .L743: movslq %edx,%rax incl %edx cmpl $3, %edx movl $0, (%rsp,%rax,4) jle .L743 movdqa (%rsp), %xmm4 jmp .L762 .L764: movdqa (%rbp), %xmm0 subl $32, %ecx movdqa 32(%rbp), %xmm1 paddd 16(%rbp), %xmm0 movdqa 64(%rbp), %xmm2 paddd 48(%rbp), %xmm1 paddd %xmm1, %xmm0 movdqa 96(%rbp), %xmm3 paddd 80(%rbp), %xmm2 paddd 112(%rbp), %xmm3 subq $-128, %rbp paddd %xmm3, %xmm2 paddd %xmm2, %xmm0 paddd %xmm0, %xmm4 .L762: cmpl $31, %ecx jg .L764 testl %ecx, %ecx jmp .L763 .L765: addl (%rbp), %esi addq $4, %rbp decl %ecx .L763: jne .L765 movaps %xmm4, (%rsp) xorl %edx, %edx .L753: movslq %edx,%rax incl %edx addl (%rsp,%rax,4), %esi cmpl $3, %edx jle .L753 movl %esi, (%r12) addq $16, %rsp popq %rbx popq %rbp popq %r12 ret register_combiners: subq $8, %rsp movl $combine1_descr, %edx movl $combine1, %esi movl $combine1, %edi call add_combiner movl $combine2_descr, %edx movl $combine1, %esi movl $combine2, %edi call add_combiner movl $combine3_descr, %edx movl $combine1, %esi movl $combine3, %edi call add_combiner movl $combine4_descr, %edx movl $combine1, %esi movl $combine4, %edi call add_combiner movl $combine4p_descr, %edx movl $combine1, %esi movl $combine4p, %edi call add_combiner movl $unroll2a_descr, %edx movl $combine1, %esi movl $unroll2a_combine, %edi call add_combiner movl $combine5p_descr, %edx movl $combine1, %esi movl $combine5p, %edi call add_combiner movl $unroll3aw_descr, %edx movl $combine1, %esi movl $unroll3aw_combine, %edi call add_combiner movl $unroll4a_descr, %edx movl $combine1, %esi movl $unroll4a_combine, %edi call add_combiner movl $unroll8a_descr, %edx movl $combine1, %esi movl $unroll8a_combine, %edi call add_combiner movl $unroll16a_descr, %edx movl $combine1, %esi movl $unroll16a_combine, %edi call add_combiner movl $unroll2_descr, %edx movl $combine1, %esi movl $unroll2_combine, %edi call add_combiner movl $unroll3_descr, %edx movl $combine1, %esi movl $unroll3_combine, %edi call add_combiner movl $unroll4_descr, %edx movl $combine1, %esi movl $unroll4_combine, %edi call add_combiner movl $unroll8_descr, %edx movl $combine1, %esi movl $unroll8_combine, %edi call add_combiner movl $unroll16_descr, %edx movl $combine1, %esi movl $unroll16_combine, %edi call add_combiner movl $combine6_descr, %edx movl $combine1, %esi movl $combine6, %edi call add_combiner movl $unroll4x2a_descr, %edx movl $combine1, %esi movl $unroll4x2a_combine, %edi call add_combiner movl $unroll8x2a_descr, %edx movl $combine1, %esi movl $unroll8x2a_combine, %edi call add_combiner movl $unroll3x3a_descr, %edx movl $combine1, %esi movl $unroll3x3a_combine, %edi call add_combiner movl $unroll4x4a_descr, %edx movl $combine1, %esi movl $unroll4x4a_combine, %edi call add_combiner movl $unroll8x4a_descr, %edx movl $combine1, %esi movl $unroll8x4a_combine, %edi call add_combiner movl $unroll6x6a_descr, %edx movl $combine1, %esi movl $unroll6x6a_combine, %edi call add_combiner movl $unroll8x8a_descr, %edx movl $combine1, %esi movl $unroll8x8a_combine, %edi call add_combiner movl $unroll10x10a_descr, %edx movl $combine1, %esi movl $unroll10x10a_combine, %edi call add_combiner movl $unroll12x6a_descr, %edx movl $combine1, %esi movl $unroll12x6a_combine, %edi call add_combiner movl $unroll12x12a_descr, %edx movl $combine1, %esi movl $unroll12x12a_combine, %edi call add_combiner movl $unroll8x2_descr, %edx movl $combine1, %esi movl $unroll8x2_combine, %edi call add_combiner movl $unroll8x4_descr, %edx movl $combine1, %esi movl $unroll8x4_combine, %edi call add_combiner movl $unroll8x8_descr, %edx movl $combine1, %esi movl $unroll8x8_combine, %edi call add_combiner movl $unroll9x3_descr, %edx movl $combine1, %esi movl $unroll9x3_combine, %edi call add_combiner movl $unrollx2as_descr, %edx movl $combine1, %esi movl $unrollx2as_combine, %edi call add_combiner movl $unroll2aa_descr, %edx movl $combine1, %esi movl $unroll2aa_combine, %edi call add_combiner movl $unroll3aa_descr, %edx movl $combine1, %esi movl $unroll3aa_combine, %edi call add_combiner movl $unroll4aa_descr, %edx movl $combine1, %esi movl $unroll4aa_combine, %edi call add_combiner movl $unroll6aa_descr, %edx movl $combine1, %esi movl $unroll6aa_combine, %edi call add_combiner movl $unroll8aa_descr, %edx movl $combine1, %esi movl $unroll8aa_combine, %edi call add_combiner movl $unrollv1_descr, %edx movl $combine1, %esi movl $unrollv1_combine, %edi call add_combiner movl $unrollv2_descr, %edx movl $combine1, %esi movl $unrollv2_combine, %edi call add_combiner movl $unrollv4_descr, %edx movl $combine1, %esi movl $unrollv4_combine, %edi call add_combiner movl $unrollv8_descr, %edx movl $combine1, %esi movl $unrollv8_combine, %edi call add_combiner movl $unrollv12_descr, %edx movl $combine1, %esi movl $unrollv12_combine, %edi call add_combiner movl $unrollv2a_descr, %edx movl $combine1, %esi movl $unrollv2a_combine, %edi call add_combiner movl $unrollv4a_descr, %edx movl $combine1, %esi movl $unrollv4a_combine, %edi call add_combiner movl $unrollv8a_descr, %edx movl $combine1, %esi movl $unrollv8a_combine, %edi addq $8, %rsp jmp add_combiner .Lframe1: