unrollv8a_descr: unrollv4a_descr: unrollv2a_descr: unrollv12_descr: unrollv8_descr: unrollv4_descr: unrollv2_descr: unrollv1_descr: unroll8aa_descr: unroll6aa_descr: unroll4aa_descr: unroll3aa_descr: unroll2aa_descr: unroll8x8_descr: unroll8x4_descr: unroll9x3_descr: unroll8x2_descr: unroll4x2as_descr: unrollx2as_descr: unroll10x10a_descr: unroll8x8a_descr: unroll6x6a_descr: unroll12x12a_descr: unroll12x6a_descr: unroll8x4a_descr: unroll4x4a_descr: unroll3x3a_descr: unroll8x2a_descr: unroll4x2a_descr: combine6_descr: unroll16_descr: unroll8_descr: unroll4_descr: unroll3_descr: unroll2_descr: unroll16a_descr: unroll8a_descr: unroll4a_descr: unroll3aw_descr: combine5p_descr: combine5_descr: unroll2a_descr: combine4p_descr: combine4_descr: combine3_descr: combine2_descr: combine1_descr: combine1: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx xorl %ebx, %ebx subq $16, %rsp movl $1, (%rsi) jmp .L2 .L5: leaq 12(%rsp), %rdx movl %ebx, %esi movq %r12, %rdi incl %ebx call get_vec_element movl 12(%rsp), %eax imull (%rbp), %eax movl %eax, (%rbp) .L2: movq %r12, %rdi call vec_length cmpl %ebx, %eax jg .L5 addq $16, %rsp popq %rbx popq %rbp popq %r12 ret combine2: movq %rbx, -32(%rsp) movq %rbp, -24(%rsp) xorl %ebx, %ebx movq %r12, -16(%rsp) movq %r13, -8(%rsp) subq $40, %rsp movq %rsi, %rbp movq %rdi, %r13 call vec_length movl %eax, %r12d movl $1, (%rbp) cmpl %r12d, %ebx jge .L13 .L15: leaq 4(%rsp), %rdx movl %ebx, %esi movq %r13, %rdi incl %ebx call get_vec_element movl 4(%rsp), %eax imull (%rbp), %eax movl %eax, (%rbp) cmpl %r12d, %ebx jl .L15 .L13: movq 8(%rsp), %rbx movq 16(%rsp), %rbp movq 24(%rsp), %r12 movq 32(%rsp), %r13 addq $40, %rsp ret combine3: pushq %r12 pushq %rbp movq %rsi, %rbp pushq %rbx movq %rdi, %rbx call vec_length movq %rbx, %rdi movl %eax, %r12d call get_vec_start testl %r12d, %r12d movl $1, (%rbp) jle .L22 movq %rax, %rcx movl %r12d, %edx .L20: movl (%rcx), %eax addq $4, %rcx imull (%rbp), %eax decl %edx movl %eax, (%rbp) jne .L20 .L22: popq %rbx popq %rbp popq %r12 ret combine4: movq %rbx, -24(%rsp) movq %rbp, -16(%rsp) movq %rdi, %rbx movq %r12, -8(%rsp) subq $24, %rsp movq %rsi, %r12 call vec_length movq %rbx, %rdi movl %eax, %ebp call get_vec_start testl %ebp, %ebp movl $1, %ecx jle .L29 movq %rax, %rdx movl %ebp, %eax .L27: imull (%rdx), %ecx addq $4, %rdx decl %eax jne .L27 .L29: movl %ecx, (%r12) movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 addq $24, %rsp ret combine4p: movq %rbx, -24(%rsp) movq %rbp, -16(%rsp) movq %rsi, %rbp movq %r12, -8(%rsp) subq $24, %rsp movq %rdi, %r12 call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq (%rax,%rbx,4), %rcx movl $1, %edx jmp .L37 .L38: imull (%rax), %edx addq $4, %rax .L37: cmpq %rcx, %rax jb .L38 movl %edx, (%rbp) movq (%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 addq $24, %rsp ret unroll2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start movl $1, %ecx movq %rax, %rsi xorl %edx, %edx jmp .L52 .L53: movslq %edx,%rax addl $2, %edx imull (%rsi,%rax,4), %ecx imull 4(%rsi,%rax,4), %ecx .L52: cmpl %ebp, %edx jl .L53 cmpl %r12d, %edx jge .L51 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L47: imull (%rax), %ecx addq $4, %rax decl %edx jne .L47 .L51: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine5: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start movl $1, %ecx movq %rax, %rsi xorl %edx, %edx jmp .L67 .L68: movslq %edx,%rax addl $3, %edx imull (%rsi,%rax,4), %ecx imull 4(%rsi,%rax,4), %ecx imull 8(%rsi,%rax,4), %ecx .L67: cmpl %ebp, %edx jl .L68 cmpl %r12d, %edx jge .L66 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L62: imull (%rax), %ecx addq $4, %rax decl %edx jne .L62 .L66: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret combine5p: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length cltq leaq (%rbp,%rax,4), %rdx movl $1, %eax leaq -8(%rdx), %rcx jmp .L82 .L84: imull (%rbp), %eax imull 4(%rbp), %eax imull 8(%rbp), %eax addq $12, %rbp .L82: cmpq %rcx, %rbp jb .L84 cmpq %rdx, %rbp jae .L81 .L85: imull (%rbp), %eax addq $4, %rbp cmpq %rdx, %rbp jb .L85 .L81: popq %rbx popq %rbp movl %eax, (%r12) popq %r12 ret unroll3aw_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start xorl %edx, %edx movq %rax, %rsi movl $1, %ecx cmpl %ebp, %edx jge .L95 xorl %eax, %eax .L89: imull (%rsi,%rax,4), %ecx addl $3, %edx movslq %edx,%rax imull -8(%rsi,%rax,4), %ecx imull -4(%rsi,%rax,4), %ecx cmpl %ebp, %edx jl .L89 .L95: cmpl %r12d, %edx jge .L97 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L93: imull (%rax), %ecx addq $4, %rax decl %edx jne .L93 .L97: movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start movl $1, %edi movq %rax, %rsi xorl %ecx, %ecx jmp .L111 .L112: movslq %ecx,%rdx addl $4, %ecx movl (%rsi,%rdx,4), %eax imull %edi, %eax movl 8(%rsi,%rdx,4), %edi imull 4(%rsi,%rdx,4), %eax imull %eax, %edi imull 12(%rsi,%rdx,4), %edi .L111: cmpl %ebp, %ecx jl .L112 cmpl %r12d, %ecx jge .L110 movslq %ecx,%rax subl %ecx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L106: imull (%rax), %edi addq $4, %rax decl %ecx jne .L106 .L110: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start movl $1, %edi movq %rax, %rcx xorl %esi, %esi jmp .L126 .L127: movslq %esi,%rdx addl $8, %esi movl (%rcx,%rdx,4), %eax imull %edi, %eax movl 24(%rcx,%rdx,4), %edi imull 4(%rcx,%rdx,4), %eax imull 8(%rcx,%rdx,4), %eax imull 12(%rcx,%rdx,4), %eax imull 16(%rcx,%rdx,4), %eax imull 20(%rcx,%rdx,4), %eax imull %eax, %edi imull 28(%rcx,%rdx,4), %edi .L126: cmpl %ebp, %esi jl .L127 cmpl %r12d, %esi jge .L125 movslq %esi,%rax subl %esi, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %esi .L121: imull (%rax), %edi addq $4, %rax decl %esi jne .L121 .L125: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll16a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -15(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rcx xorl %edi, %edi jmp .L141 .L142: movslq %edi,%rdx addl $16, %edi movl (%rcx,%rdx,4), %eax imull %esi, %eax movl 56(%rcx,%rdx,4), %esi imull 4(%rcx,%rdx,4), %eax imull 8(%rcx,%rdx,4), %eax imull 12(%rcx,%rdx,4), %eax imull 16(%rcx,%rdx,4), %eax imull 20(%rcx,%rdx,4), %eax imull 24(%rcx,%rdx,4), %eax imull 28(%rcx,%rdx,4), %eax imull 32(%rcx,%rdx,4), %eax imull 36(%rcx,%rdx,4), %eax imull 40(%rcx,%rdx,4), %eax imull 44(%rcx,%rdx,4), %eax imull 48(%rcx,%rdx,4), %eax imull 52(%rcx,%rdx,4), %eax imull %eax, %esi imull 60(%rcx,%rdx,4), %esi .L141: cmpl %ebp, %edi jl .L142 cmpl %r12d, %edi jge .L140 movslq %edi,%rax subl %edi, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edi .L136: imull (%rax), %esi addq $4, %rax decl %edi jne .L136 .L140: movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx movl %ebx, %eax movl %ebx, %ecx shrl $31, %eax leal (%rbx,%rax), %eax movslq %ebx,%rbx andl $-2, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx movl $1, %eax jmp .L154 .L156: imull (%rdx), %eax imull 4(%rdx), %eax addq $8, %rdx .L154: cmpq %rcx, %rdx jb .L156 leaq (%rcx,%rsi,4), %rcx jmp .L155 .L157: imull (%rdx), %eax addq $4, %rdx .L155: cmpq %rcx, %rdx jb .L157 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -8(%rax,%rbx,4), %rcx movl $1, %edx jmp .L169 .L171: imull (%rax), %edx imull 4(%rax), %edx imull 8(%rax), %edx addq $12, %rax .L169: cmpq %rcx, %rax jb .L171 addq $8, %rcx jmp .L170 .L172: imull (%rax), %edx addq $4, %rax .L170: cmpq %rcx, %rax jb .L172 movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -12(%rax,%rbx,4), %rcx movl $1, %edx jmp .L184 .L186: imull (%rax), %edx imull 4(%rax), %edx imull 8(%rax), %edx imull 12(%rax), %edx addq $16, %rax .L184: cmpq %rcx, %rax jb .L186 addq $12, %rcx jmp .L185 .L187: imull (%rax), %edx addq $4, %rax .L185: cmpq %rcx, %rax jb .L187 movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx leal 7(%rbx), %eax cmpl $-1, %ebx movl %ebx, %ecx cmovg %ebx, %eax movslq %ebx,%rbx andl $-8, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx movl $1, %eax jmp .L200 .L202: imull (%rdx), %eax imull 4(%rdx), %eax imull 8(%rdx), %eax imull 12(%rdx), %eax imull 16(%rdx), %eax imull 20(%rdx), %eax imull 24(%rdx), %eax imull 28(%rdx), %eax addq $32, %rdx .L200: cmpq %rcx, %rdx jb .L202 leaq (%rcx,%rsi,4), %rcx jmp .L201 .L203: imull (%rdx), %eax addq $4, %rdx .L201: cmpq %rcx, %rdx jb .L203 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll16_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movq %r12, %rdi movl %eax, %ebx call get_vec_start movq %rax, %rdx leal 15(%rbx), %eax cmpl $-1, %ebx movl %ebx, %ecx cmovg %ebx, %eax movslq %ebx,%rbx andl $-16, %eax subl %eax, %ecx movl %ecx, %eax leaq (%rdx,%rbx,4), %rcx movslq %eax,%rsi leaq 0(,%rsi,4), %rax subq %rax, %rcx movl $1, %eax jmp .L216 .L218: imull (%rdx), %eax imull 4(%rdx), %eax imull 8(%rdx), %eax imull 12(%rdx), %eax imull 16(%rdx), %eax imull 20(%rdx), %eax imull 24(%rdx), %eax imull 28(%rdx), %eax imull 32(%rdx), %eax imull 36(%rdx), %eax imull 40(%rdx), %eax imull 44(%rdx), %eax imull 48(%rdx), %eax imull 52(%rdx), %eax imull 56(%rdx), %eax imull 60(%rdx), %eax addq $64, %rdx .L216: cmpq %rcx, %rdx jb .L218 leaq (%rcx,%rsi,4), %rcx jmp .L217 .L219: imull (%rdx), %eax addq $4, %rdx .L217: cmpq %rcx, %rdx jb .L219 movl %eax, (%rbp) popq %rbx popq %rbp popq %r12 ret combine6: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rcx movl $1, %edi xorl %edx, %edx jmp .L233 .L234: movslq %edx,%rax addl $2, %edx imull (%rcx,%rax,4), %esi imull 4(%rcx,%rax,4), %edi .L233: cmpl %ebp, %edx jl .L234 cmpl %r12d, %edx jge .L232 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L228: imull (%rax), %esi addq $4, %rax decl %edx jne .L228 .L232: imull %edi, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rcx movl $1, %edi xorl %edx, %edx jmp .L248 .L249: movslq %edx,%rax addl $4, %edx imull (%rcx,%rax,4), %esi imull 4(%rcx,%rax,4), %edi imull 8(%rcx,%rax,4), %esi imull 12(%rcx,%rax,4), %edi .L248: cmpl %ebp, %edx jl .L249 cmpl %r12d, %edx jge .L247 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L243: imull (%rax), %esi addq $4, %rax decl %edx jne .L243 .L247: imull %edi, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x2a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start movl $1, %ecx movq %rax, %rdx movl $1, %edi xorl %esi, %esi jmp .L263 .L264: movslq %esi,%rax addl $8, %esi imull (%rdx,%rax,4), %ecx imull 4(%rdx,%rax,4), %edi imull 8(%rdx,%rax,4), %ecx imull 12(%rdx,%rax,4), %edi imull 16(%rdx,%rax,4), %ecx imull 20(%rdx,%rax,4), %edi imull 24(%rdx,%rax,4), %ecx imull 28(%rdx,%rax,4), %edi .L263: cmpl %ebp, %esi jl .L264 cmpl %r12d, %esi jge .L262 movslq %esi,%rax subl %esi, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %esi .L258: imull (%rax), %ecx addq $4, %rax decl %esi jne .L258 .L262: imull %edi, %ecx movl %ecx, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3x3a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rcx movl $1, %edi movl $1, %r8d xorl %edx, %edx jmp .L278 .L279: movslq %edx,%rax addl $3, %edx imull (%rcx,%rax,4), %esi imull 4(%rcx,%rax,4), %edi imull 8(%rcx,%rax,4), %r8d .L278: cmpl %ebp, %edx jl .L279 cmpl %r12d, %edx jge .L277 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L273: imull (%rax), %esi addq $4, %rax decl %edx jne .L273 .L277: imull %edi, %esi imull %r8d, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rcx movl $1, %edi movl $1, %r8d movl $1, %r9d xorl %edx, %edx jmp .L293 .L294: movslq %edx,%rax addl $4, %edx imull (%rcx,%rax,4), %esi imull 4(%rcx,%rax,4), %edi imull 8(%rcx,%rax,4), %r8d imull 12(%rcx,%rax,4), %r9d .L293: cmpl %ebp, %edx jl .L294 cmpl %r12d, %edx jge .L292 movslq %edx,%rax subl %edx, %r12d leaq (%rcx,%rax,4), %rax movl %r12d, %edx .L288: imull (%rax), %esi addq $4, %rax decl %edx jne .L288 .L292: imull %edi, %esi imull %r8d, %esi imull %r9d, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x4a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %edi movl $1, %r8d movl $1, %r9d xorl %ecx, %ecx jmp .L308 .L309: movslq %ecx,%rax addl $8, %ecx imull (%rdx,%rax,4), %esi imull 4(%rdx,%rax,4), %edi imull 8(%rdx,%rax,4), %r8d imull 12(%rdx,%rax,4), %r9d imull 16(%rdx,%rax,4), %esi imull 20(%rdx,%rax,4), %edi imull 24(%rdx,%rax,4), %r8d imull 28(%rdx,%rax,4), %r9d .L308: cmpl %ebp, %ecx jl .L309 cmpl %r12d, %ecx jge .L307 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx .L303: imull (%rax), %esi addq $4, %rax decl %ecx jne .L303 .L307: imull %edi, %esi imull %r8d, %esi imull %r9d, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll12x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -11(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %r11d movl $1, %edi movl $1, %r9d movl $1, %r8d movl $1, %r10d xorl %ecx, %ecx jmp .L323 .L324: movslq %ecx,%rax addl $12, %ecx imull (%rdx,%rax,4), %esi imull 4(%rdx,%rax,4), %r11d imull 8(%rdx,%rax,4), %edi imull 12(%rdx,%rax,4), %r9d imull 16(%rdx,%rax,4), %r8d imull 20(%rdx,%rax,4), %r10d imull 24(%rdx,%rax,4), %esi imull 28(%rdx,%rax,4), %r11d imull 32(%rdx,%rax,4), %edi imull 36(%rdx,%rax,4), %r9d imull 40(%rdx,%rax,4), %r8d imull 44(%rdx,%rax,4), %r10d .L323: cmpl %ebp, %ecx jl .L324 cmpl %r12d, %ecx jge .L322 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx .L318: imull (%rax), %esi addq $4, %rax decl %ecx jne .L318 .L322: imull %r11d, %esi imull %r9d, %edi imull %r10d, %r8d imull %edi, %esi imull %r8d, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll12x12a_combine: pushq %r15 movl $1, %r15d pushq %r14 movl $1, %r14d pushq %r13 movl $1, %r13d pushq %r12 movl $1, %r12d pushq %rbp movl $1, %ebp pushq %rbx movq %rdi, %rbx subq $24, %rsp movq %rsi, 16(%rsp) call vec_length movq %rbx, %rdi movl %eax, 12(%rsp) subl $11, %eax movl %eax, 8(%rsp) movl $1, %ebx call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %edi movl $1, %r8d movl $1, %r9d movl $1, %r10d movl $1, %r11d xorl %ecx, %ecx jmp .L338 .L339: movslq %ecx,%rax addl $12, %ecx imull (%rdx,%rax,4), %esi imull 24(%rdx,%rax,4), %r9d imull 4(%rdx,%rax,4), %r15d imull 28(%rdx,%rax,4), %r12d imull 8(%rdx,%rax,4), %edi imull 32(%rdx,%rax,4), %r10d imull 12(%rdx,%rax,4), %ebx imull 36(%rdx,%rax,4), %r13d imull 16(%rdx,%rax,4), %r8d imull 40(%rdx,%rax,4), %r11d imull 20(%rdx,%rax,4), %ebp imull 44(%rdx,%rax,4), %r14d .L338: cmpl 8(%rsp), %ecx jl .L339 cmpl 12(%rsp), %ecx jge .L337 movslq %ecx,%rax leaq (%rdx,%rax,4), %rax movl 12(%rsp), %edx subl %ecx, %edx movl %edx, %ecx .L333: imull (%rax), %esi addq $4, %rax decl %ecx jne .L333 .L337: imull %r15d, %esi movq 16(%rsp), %rax imull %ebx, %edi imull %ebp, %r8d imull %edi, %esi imull %r12d, %r9d imull %r8d, %esi imull %r13d, %r10d imull %r9d, %esi imull %r14d, %r11d imull %r10d, %esi imull %r11d, %esi movl %esi, (%rax) addq $24, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret unroll6x6a_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %edi movl $1, %r8d movl $1, %r9d movl $1, %r10d movl $1, %r11d xorl %ecx, %ecx jmp .L353 .L354: movslq %ecx,%rax addl $6, %ecx imull (%rdx,%rax,4), %esi imull 4(%rdx,%rax,4), %edi imull 8(%rdx,%rax,4), %r8d imull 12(%rdx,%rax,4), %r9d imull 16(%rdx,%rax,4), %r10d imull 20(%rdx,%rax,4), %r11d .L353: cmpl %ebp, %ecx jl .L354 cmpl %r12d, %ecx jge .L352 movslq %ecx,%rax subl %ecx, %r12d leaq (%rdx,%rax,4), %rax movl %r12d, %ecx .L348: imull (%rax), %esi addq $4, %rax decl %ecx jne .L348 .L352: imull %edi, %esi imull %r8d, %esi imull %r9d, %esi imull %r10d, %esi imull %r11d, %esi movl %esi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x8a_combine: pushq %r14 movq %rsi, %r14 pushq %r13 pushq %r12 pushq %rbp movl $1, %ebp pushq %rbx movq %rdi, %rbx call vec_length movl %eax, %r13d movq %rbx, %rdi movl $1, %ebx leal -7(%r13), %r12d call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %edi movl $1, %r8d movl $1, %r9d movl $1, %r10d movl $1, %r11d xorl %ecx, %ecx jmp .L368 .L369: movslq %ecx,%rax addl $8, %ecx imull (%rdx,%rax,4), %esi imull 4(%rdx,%rax,4), %edi imull 8(%rdx,%rax,4), %r8d imull 12(%rdx,%rax,4), %r9d imull 16(%rdx,%rax,4), %r10d imull 20(%rdx,%rax,4), %r11d imull 24(%rdx,%rax,4), %ebx imull 28(%rdx,%rax,4), %ebp .L368: cmpl %r12d, %ecx jl .L369 cmpl %r13d, %ecx jge .L367 movslq %ecx,%rax subl %ecx, %r13d leaq (%rdx,%rax,4), %rax movl %r13d, %ecx .L363: imull (%rax), %esi addq $4, %rax decl %ecx jne .L363 .L367: imull %edi, %esi imull %r8d, %esi imull %r9d, %esi imull %r10d, %esi imull %r11d, %esi imull %ebx, %esi popq %rbx imull %ebp, %esi popq %rbp popq %r12 popq %r13 movl %esi, (%r14) popq %r14 ret unroll10x10a_combine: pushq %r15 pushq %r14 pushq %r13 movl $1, %r13d pushq %r12 movl $1, %r12d pushq %rbp movl $1, %ebp pushq %rbx movq %rdi, %rbx subq $8, %rsp movq %rsi, (%rsp) call vec_length movl %eax, %r15d movq %rbx, %rdi movl $1, %ebx leal -9(%r15), %r14d call get_vec_start movl $1, %esi movq %rax, %rdx movl $1, %edi movl $1, %r8d movl $1, %r9d movl $1, %r10d movl $1, %r11d xorl %ecx, %ecx jmp .L383 .L384: movslq %ecx,%rax addl $10, %ecx imull (%rdx,%rax,4), %esi imull 4(%rdx,%rax,4), %edi imull 8(%rdx,%rax,4), %r8d imull 12(%rdx,%rax,4), %r9d imull 16(%rdx,%rax,4), %r10d imull 20(%rdx,%rax,4), %r11d imull 24(%rdx,%rax,4), %ebx imull 28(%rdx,%rax,4), %ebp imull 32(%rdx,%rax,4), %r12d imull 36(%rdx,%rax,4), %r13d .L383: cmpl %r14d, %ecx jl .L384 cmpl %r15d, %ecx jge .L382 movslq %ecx,%rax subl %ecx, %r15d leaq (%rdx,%rax,4), %rax movl %r15d, %ecx .L378: imull (%rax), %esi addq $4, %rax decl %ecx jne .L378 .L382: imull %edi, %esi movq (%rsp), %rax imull %r8d, %esi imull %r9d, %esi imull %r10d, %esi imull %r11d, %esi imull %ebx, %esi imull %ebp, %esi imull %r12d, %esi imull %r13d, %esi movl %esi, (%rax) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 popq %r15 ret unrollx2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%r12,%rax), %ebp call get_vec_start sarl %ebp movq %rax, %rsi movl $1, %edi movslq %ebp,%rax movl $1, %ecx xorl %edx, %edx leaq (%rsi,%rax,4), %r8 jmp .L398 .L399: movslq %edx,%rax incl %edx imull (%rsi,%rax,4), %edi imull (%r8,%rax,4), %ecx .L398: cmpl %ebp, %edx jl .L399 leal (%rbp,%rbp), %edx cmpl %r12d, %edx jge .L397 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L393: imull (%rax), %ecx addq $4, %rax decl %edx jne .L393 .L397: imull %ecx, %edi movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4x2as_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d shrl $31, %eax movq %rbx, %rdi leal (%r12,%rax), %ebp call get_vec_start sarl %ebp movq %rax, %rsi movl $1, %edi movslq %ebp,%rax movl $1, %ecx xorl %edx, %edx leaq (%rsi,%rax,4), %r8 jmp .L413 .L414: movslq %edx,%rax incl %edx imull (%rsi,%rax,4), %edi imull (%r8,%rax,4), %ecx .L413: cmpl %ebp, %edx jl .L414 leal (%rbp,%rbp), %edx cmpl %r12d, %edx jge .L412 movslq %edx,%rax subl %edx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %edx .L408: imull (%rax), %ecx addq $4, %rax decl %edx jne .L408 .L412: imull %ecx, %edi movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8x2_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rsi movl $1, %edx movl $1, %ecx jmp .L426 .L428: imull (%rax), %edx imull 4(%rax), %ecx imull 8(%rax), %edx imull 12(%rax), %ecx imull 16(%rax), %edx imull 20(%rax), %ecx imull 24(%rax), %edx imull 28(%rax), %ecx addq $32, %rax .L426: cmpq %rsi, %rax jb .L428 addq $28, %rsi jmp .L427 .L429: imull (%rax), %edx addq $4, %rax .L427: cmpq %rsi, %rax jb .L429 imull %ecx, %edx movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll9x3_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -32(%rax,%rbx,4), %rdi movl $1, %edx movl $1, %ecx movl $1, %esi jmp .L441 .L443: imull (%rax), %edx imull 4(%rax), %ecx imull 8(%rax), %esi imull 12(%rax), %edx imull 16(%rax), %ecx imull 20(%rax), %esi imull 24(%rax), %edx imull 28(%rax), %ecx imull 32(%rax), %esi addq $36, %rax .L441: cmpq %rdi, %rax jb .L443 addq $32, %rdi jmp .L442 .L444: imull (%rax), %edx addq $4, %rax .L442: cmpq %rdi, %rax jb .L444 imull %ecx, %edx imull %esi, %edx movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8x4_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rcx movl $1, %edx movl $1, %esi movl $1, %edi movl $1, %r8d jmp .L456 .L458: imull (%rax), %edx imull 4(%rax), %esi imull 8(%rax), %edi imull 12(%rax), %r8d imull 16(%rax), %edx imull 20(%rax), %esi imull 24(%rax), %edi imull 28(%rax), %r8d addq $32, %rax .L456: cmpq %rcx, %rax jb .L458 addq $28, %rcx jmp .L457 .L459: imull (%rax), %edx addq $4, %rax .L457: cmpq %rcx, %rax jb .L459 imull %esi, %edx imull %edi, %edx imull %r8d, %edx movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll8x8_combine: pushq %r12 movq %rdi, %r12 pushq %rbp movq %rsi, %rbp pushq %rbx call vec_length movl %eax, %ebx movq %r12, %rdi movslq %ebx,%rbx call get_vec_start leaq -28(%rax,%rbx,4), %rcx movl $1, %edx movl $1, %esi movl $1, %edi movl $1, %r8d movl $1, %r9d movl $1, %r10d movl $1, %r11d movl $1, %ebx jmp .L471 .L473: imull (%rax), %edx imull 4(%rax), %esi imull 8(%rax), %edi imull 12(%rax), %r8d imull 16(%rax), %r9d imull 20(%rax), %r10d imull 24(%rax), %r11d imull 28(%rax), %ebx addq $32, %rax .L471: cmpq %rcx, %rax jb .L473 addq $28, %rcx jmp .L472 .L474: imull (%rax), %edx addq $4, %rax .L472: cmpq %rcx, %rax jb .L474 imull %esi, %edx imull %edi, %edx imull %r8d, %edx imull %r9d, %edx imull %r10d, %edx imull %r11d, %edx imull %ebx, %edx movl %edx, (%rbp) popq %rbx popq %rbp popq %r12 ret unroll2aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -1(%r12), %ebp call get_vec_start movl $1, %edi movq %rax, %rsi xorl %ecx, %ecx jmp .L488 .L489: movslq %ecx,%rdx addl $2, %ecx movl 4(%rsi,%rdx,4), %eax imull (%rsi,%rdx,4), %eax imull %eax, %edi .L488: cmpl %ebp, %ecx jl .L489 cmpl %r12d, %ecx jge .L487 movslq %ecx,%rax subl %ecx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L483: imull (%rax), %edi addq $4, %rax decl %ecx jne .L483 .L487: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll3aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -2(%r12), %ebp call get_vec_start movl $1, %edi movq %rax, %rsi xorl %ecx, %ecx jmp .L503 .L504: movslq %ecx,%rdx addl $3, %ecx movl 4(%rsi,%rdx,4), %eax imull (%rsi,%rdx,4), %eax imull 8(%rsi,%rdx,4), %eax imull %eax, %edi .L503: cmpl %ebp, %ecx jl .L504 cmpl %r12d, %ecx jge .L502 movslq %ecx,%rax subl %ecx, %r12d leaq (%rsi,%rax,4), %rax movl %r12d, %ecx .L498: imull (%rax), %edi addq $4, %rax decl %ecx jne .L498 .L502: movl %edi, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll4aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -3(%r12), %ebp call get_vec_start movl $1, %r8d movq %rax, %rdi xorl %esi, %esi jmp .L518 .L519: movslq %esi,%rax addl $4, %esi movl 4(%rdi,%rax,4), %ecx movl 12(%rdi,%rax,4), %edx imull (%rdi,%rax,4), %ecx imull 8(%rdi,%rax,4), %edx imull %edx, %ecx imull %ecx, %r8d .L518: cmpl %ebp, %esi jl .L519 cmpl %r12d, %esi jge .L517 movslq %esi,%rax subl %esi, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %esi .L513: imull (%rax), %r8d addq $4, %rax decl %esi jne .L513 .L517: movl %r8d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll6aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -5(%r12), %ebp call get_vec_start movl $1, %r9d movq %rax, %rdi xorl %r8d, %r8d jmp .L533 .L534: movslq %r8d,%rcx addl $6, %r8d movl 4(%rdi,%rcx,4), %eax movl 12(%rdi,%rcx,4), %esi imull (%rdi,%rcx,4), %eax imull 8(%rdi,%rcx,4), %esi movl 20(%rdi,%rcx,4), %edx imull 16(%rdi,%rcx,4), %edx imull %esi, %eax imull %edx, %eax imull %eax, %r9d .L533: cmpl %ebp, %r8d jl .L534 cmpl %r12d, %r8d jge .L532 movslq %r8d,%rax subl %r8d, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %r8d .L528: imull (%rax), %r9d addq $4, %rax decl %r8d jne .L528 .L532: movl %r9d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unroll8aa_combine: pushq %r13 movq %rsi, %r13 pushq %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $8, %rsp call vec_length movl %eax, %r12d movq %rbx, %rdi leal -7(%r12), %ebp call get_vec_start movl $1, %r9d movq %rax, %rdi xorl %r8d, %r8d jmp .L548 .L549: movslq %r8d,%rdx addl $8, %r8d movl 4(%rdi,%rdx,4), %esi movl 12(%rdi,%rdx,4), %eax imull (%rdi,%rdx,4), %esi imull 8(%rdi,%rdx,4), %eax movl 20(%rdi,%rdx,4), %ecx imull 16(%rdi,%rdx,4), %ecx imull %eax, %esi movl 28(%rdi,%rdx,4), %eax imull 24(%rdi,%rdx,4), %eax imull %eax, %ecx imull %ecx, %esi imull %esi, %r9d .L548: cmpl %ebp, %r8d jl .L549 cmpl %r12d, %r8d jge .L547 movslq %r8d,%rax subl %r8d, %r12d leaq (%rdi,%rax,4), %rax movl %r12d, %r8d .L543: imull (%rax), %r9d addq $4, %rax decl %r8d jne .L543 .L547: movl %r9d, (%r13) addq $8, %rsp popq %rbx popq %rbp popq %r12 popq %r13 ret unrollv1_combine: pushq %r12 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $32, %rsp call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %edi movl %eax, %esi xorl %edx, %edx .L554: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 16(%rsp,%rax,4) jle .L554 movdqa 16(%rsp), %xmm0 movaps %xmm0, (%rsp) jmp .L573 .L575: movl (%rsp), %eax movq (%rbp), %rcx subl $4, %esi imull (%rbp), %eax movq 8(%rbp), %rbx movl %eax, (%rsp) movq (%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, (%rsp) movl 8(%rsp), %eax imull 8(%rbp), %eax addq $16, %rbp movl %eax, 8(%rsp) movq 8(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 8(%rsp) .L573: cmpl $3, %esi jg .L575 testl %esi, %esi jmp .L574 .L576: imull (%rbp), %edi addq $4, %rbp decl %esi .L574: jne .L576 movdqa (%rsp), %xmm0 xorl %edx, %edx movaps %xmm0, 16(%rsp) .L564: movslq %edx,%rax incl %edx imull 16(%rsp,%rax,4), %edi cmpl $3, %edx jle .L564 movl %edi, (%r12) addq $32, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $64, %rsp movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r8d movl %eax, %esi xorl %edx, %edx .L581: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 48(%rsp,%rax,4) jle .L581 movdqa 48(%rsp), %xmm0 movaps %xmm0, 16(%rsp) movaps %xmm0, 32(%rsp) jmp .L600 .L602: movl 32(%rsp), %eax movq (%rbp), %rcx subl $8, %esi imull (%rbp), %eax movq 8(%rbp), %rbx movl %eax, 32(%rsp) movq 32(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 16(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 32(%rsp) movl 40(%rsp), %eax imull 8(%rbp), %eax movl %eax, 40(%rsp) movq 40(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 24(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 40(%rsp) movl 16(%rsp), %eax imull 16(%rbp), %eax movl %eax, 16(%rsp) movq 16(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 16(%rsp) movl 24(%rsp), %eax imull 24(%rbp), %eax addq $32, %rbp movl %eax, 24(%rsp) movq 24(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 24(%rsp) .L600: cmpl $7, %esi jg .L602 testl %esi, %esi jmp .L601 .L603: imull (%rbp), %r8d addq $4, %rbp decl %esi .L601: jne .L603 movl 32(%rsp), %eax movq 32(%rsp), %rsi imull 16(%rsp), %eax movq 16(%rsp), %rcx movq 40(%rsp), %rdi movq %rsi, %rdx movq 24(%rsp), %rbx sarq $32, %rdx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 40(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax xorl %edx, %edx movq %rax, 8(%rsp) movdqa (%rsp), %xmm0 movaps %xmm0, 48(%rsp) .L591: movslq %edx,%rax incl %edx imull 48(%rsp,%rax,4), %r8d cmpl $3, %edx jle .L591 movl %r8d, (%r12) addq $64, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx addq $-128, %rsp movaps %xmm0, 32(%rsp) movaps %xmm0, 16(%rsp) movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r8d movl %eax, %esi xorl %edx, %edx .L608: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 112(%rsp,%rax,4) jle .L608 movdqa 112(%rsp), %xmm0 movaps %xmm0, 80(%rsp) movaps %xmm0, 96(%rsp) movaps %xmm0, 48(%rsp) movaps %xmm0, 64(%rsp) jmp .L627 .L629: movl 96(%rsp), %eax movq (%rbp), %rcx imull (%rbp), %eax movq 8(%rbp), %rbx movl %eax, 96(%rsp) movq 96(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 16(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 96(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 96(%rsp) movl 104(%rsp), %eax imull 8(%rbp), %eax movl %eax, 104(%rsp) movq 104(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 24(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 104(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 104(%rsp) movl 80(%rsp), %eax imull 16(%rbp), %eax movl %eax, 80(%rsp) movq 80(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 32(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 80(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 80(%rsp) movl 88(%rsp), %eax imull 24(%rbp), %eax movl %eax, 88(%rsp) movq 88(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 40(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 88(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 88(%rsp) movl 64(%rsp), %eax imull 32(%rbp), %eax movl %eax, 64(%rsp) movq 64(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 64(%rsp), %rax salq $32, %rdx andl $4294967295, %eax subl $16, %esi orq %rdx, %rax movq %rax, 64(%rsp) movl 72(%rsp), %eax imull 40(%rbp), %eax movq 48(%rbp), %rcx movl %eax, 72(%rsp) movq 72(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 56(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 72(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 72(%rsp) movl 48(%rsp), %eax imull 48(%rbp), %eax movl %eax, 48(%rsp) movq 48(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 48(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 48(%rsp) movl 56(%rsp), %eax imull 56(%rbp), %eax addq $64, %rbp movl %eax, 56(%rsp) movq 56(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 56(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 56(%rsp) .L627: cmpl $15, %esi jg .L629 testl %esi, %esi jmp .L628 .L630: imull (%rbp), %r8d addq $4, %rbp decl %esi .L628: jne .L630 movl 96(%rsp), %eax movq 96(%rsp), %rsi imull 80(%rsp), %eax movq 80(%rsp), %rcx movq 104(%rsp), %rdi movq %rsi, %rdx movq 88(%rsp), %rbx movq 64(%rsp), %rsi sarq $32, %rdx movl %eax, 32(%rsp) movq %rcx, %rax movq 48(%rsp), %rcx sarq $32, %rax imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 72(%rsp), %rdi movq %rax, 32(%rsp) movl 104(%rsp), %eax sarq $32, %rdx imull 88(%rsp), %eax movl %eax, 40(%rsp) movq %rbx, %rax movq 56(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 32(%rsp), %rsi movq %rax, 40(%rsp) movl 64(%rsp), %eax sarq $32, %rdx imull 48(%rsp), %eax movl %eax, 16(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 40(%rsp), %rdi movq %rax, 16(%rsp) movl 72(%rsp), %eax sarq $32, %rdx imull 56(%rsp), %eax movq 16(%rsp), %rcx movl %eax, 24(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 24(%rsp) movl 32(%rsp), %eax sarq $32, %rdx imull 16(%rsp), %eax movq 24(%rsp), %rbx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 40(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax xorl %edx, %edx movq %rax, 8(%rsp) movdqa (%rsp), %xmm0 movaps %xmm0, 112(%rsp) .L618: movslq %edx,%rax incl %edx imull 112(%rsp,%rax,4), %r8d cmpl $3, %edx jle .L618 movl %r8d, (%r12) subq $-128, %rsp popq %rbx popq %rbp popq %r12 ret unrollv8_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $256, %rsp movaps %xmm0, 96(%rsp) movaps %xmm0, 80(%rsp) movaps %xmm0, 64(%rsp) movaps %xmm0, 48(%rsp) movaps %xmm0, 32(%rsp) movaps %xmm0, 16(%rsp) movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r8d movl %eax, %esi xorl %edx, %edx .L635: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 240(%rsp,%rax,4) jle .L635 movdqa 240(%rsp), %xmm0 movaps %xmm0, 208(%rsp) movaps %xmm0, 224(%rsp) movaps %xmm0, 176(%rsp) movaps %xmm0, 192(%rsp) movaps %xmm0, 144(%rsp) movaps %xmm0, 160(%rsp) movaps %xmm0, 112(%rsp) movaps %xmm0, 128(%rsp) jmp .L654 .L656: movl 224(%rsp), %eax movq (%rbp), %rcx imull (%rbp), %eax movq 8(%rbp), %rbx movl %eax, 224(%rsp) movq 224(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 16(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 224(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 224(%rsp) movl 232(%rsp), %eax imull 8(%rbp), %eax movl %eax, 232(%rsp) movq 232(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 24(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 232(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 232(%rsp) movl 208(%rsp), %eax imull 16(%rbp), %eax movl %eax, 208(%rsp) movq 208(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 32(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 208(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 208(%rsp) movl 216(%rsp), %eax imull 24(%rbp), %eax movl %eax, 216(%rsp) movq 216(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 40(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 216(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 216(%rsp) movl 192(%rsp), %eax imull 32(%rbp), %eax movl %eax, 192(%rsp) movq 192(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 192(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 192(%rsp) movl 200(%rsp), %eax imull 40(%rbp), %eax movq 48(%rbp), %rcx movl %eax, 200(%rsp) movq 200(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 56(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 200(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 200(%rsp) movl 176(%rsp), %eax imull 48(%rbp), %eax movl %eax, 176(%rsp) movq 176(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 64(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 176(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 176(%rsp) movl 184(%rsp), %eax imull 56(%rbp), %eax movl %eax, 184(%rsp) movq 184(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 72(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 184(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 184(%rsp) movl 160(%rsp), %eax imull 64(%rbp), %eax movl %eax, 160(%rsp) movq 160(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 160(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 160(%rsp) movl 168(%rsp), %eax imull 72(%rbp), %eax movl %eax, 168(%rsp) movq 168(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 168(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 168(%rsp) movl 144(%rsp), %eax imull 80(%rbp), %eax movq 80(%rbp), %rcx movq 88(%rbp), %rbx movl %eax, 144(%rsp) movq 144(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 96(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 144(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 144(%rsp) movl 152(%rsp), %eax imull 88(%rbp), %eax movl %eax, 152(%rsp) movq 152(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 104(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 152(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 152(%rsp) movl 128(%rsp), %eax imull 96(%rbp), %eax movl %eax, 128(%rsp) movq 128(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 112(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 128(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 128(%rsp) movl 136(%rsp), %eax imull 104(%rbp), %eax movl %eax, 136(%rsp) movq 136(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 120(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 136(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 136(%rsp) movl 112(%rsp), %eax imull 112(%rbp), %eax movl %eax, 112(%rsp) movq 112(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rdx sarq $32, %rax subl $32, %esi imull %eax, %edx movq 112(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 112(%rsp) movl 120(%rsp), %eax imull 120(%rbp), %eax subq $-128, %rbp movl %eax, 120(%rsp) movq 120(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 120(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 120(%rsp) .L654: cmpl $31, %esi jg .L656 testl %esi, %esi jmp .L655 .L657: imull (%rbp), %r8d addq $4, %rbp decl %esi .L655: jne .L657 movl 224(%rsp), %eax movq 224(%rsp), %rsi imull 208(%rsp), %eax movq 208(%rsp), %rcx movq 232(%rsp), %rdi movq %rsi, %rdx movq 216(%rsp), %rbx movq 192(%rsp), %rsi sarq $32, %rdx movl %eax, 96(%rsp) movq %rcx, %rax movq 176(%rsp), %rcx sarq $32, %rax imull %eax, %edx movq 96(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 200(%rsp), %rdi movq %rax, 96(%rsp) movl 232(%rsp), %eax sarq $32, %rdx imull 216(%rsp), %eax movl %eax, 104(%rsp) movq %rbx, %rax movq 184(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 104(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 96(%rsp), %rsi movq %rax, 104(%rsp) movl 192(%rsp), %eax sarq $32, %rdx imull 176(%rsp), %eax movl %eax, 80(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 80(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 104(%rsp), %rdi movq %rax, 80(%rsp) movl 200(%rsp), %eax sarq $32, %rdx imull 184(%rsp), %eax movq 80(%rsp), %rcx movl %eax, 88(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 88(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 88(%rsp) movl 96(%rsp), %eax sarq $32, %rdx imull 80(%rsp), %eax movq 88(%rsp), %rbx movl %eax, 64(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 64(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 64(%rsp) movl 104(%rsp), %eax sarq $32, %rdx imull 88(%rsp), %eax movq 160(%rsp), %rsi movq 144(%rsp), %rcx movq 168(%rsp), %rdi movl %eax, 72(%rsp) movq %rbx, %rax movq 152(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 72(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 64(%rsp), %rsi movq %rax, 72(%rsp) movl 160(%rsp), %eax sarq $32, %rdx imull 144(%rsp), %eax movl %eax, 48(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 48(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 72(%rsp), %rdi movq %rax, 48(%rsp) movl 168(%rsp), %eax sarq $32, %rdx imull 152(%rsp), %eax movq 48(%rsp), %rcx movl %eax, 56(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 56(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 56(%rsp) movl 64(%rsp), %eax sarq $32, %rdx imull 48(%rsp), %eax movq 56(%rsp), %rbx movl %eax, 32(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 32(%rsp) movl 72(%rsp), %eax sarq $32, %rdx imull 56(%rsp), %eax movl %eax, 40(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 40(%rsp) movl 128(%rsp), %eax imull 112(%rsp), %eax movl %eax, 16(%rsp) movq 128(%rsp), %rsi movq 112(%rsp), %rcx movq 136(%rsp), %rdi movq 120(%rsp), %rbx movq %rsi, %rdx movq 32(%rsp), %rsi movq %rcx, %rax sarq $32, %rdx sarq $32, %rax imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 40(%rsp), %rdi movq %rax, 16(%rsp) movl 136(%rsp), %eax sarq $32, %rdx imull 120(%rsp), %eax movq 16(%rsp), %rcx movl %eax, 24(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 24(%rsp) movl 32(%rsp), %eax sarq $32, %rdx imull 16(%rsp), %eax movq 24(%rsp), %rbx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 40(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax xorl %edx, %edx movq %rax, 8(%rsp) movdqa (%rsp), %xmm0 movaps %xmm0, 240(%rsp) .L645: movslq %edx,%rax incl %edx imull 240(%rsp,%rax,4), %r8d cmpl $3, %edx jle .L645 movl %r8d, (%r12) addq $256, %rsp popq %rbx popq %rbp popq %r12 ret unrollv12_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $384, %rsp movaps %xmm0, 160(%rsp) movaps %xmm0, 144(%rsp) movaps %xmm0, 128(%rsp) movaps %xmm0, 112(%rsp) movaps %xmm0, 96(%rsp) movaps %xmm0, 80(%rsp) movaps %xmm0, 64(%rsp) movaps %xmm0, 48(%rsp) movaps %xmm0, 32(%rsp) movaps %xmm0, 16(%rsp) movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r8d movl %eax, %esi xorl %edx, %edx .L662: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 368(%rsp,%rax,4) jle .L662 movdqa 368(%rsp), %xmm0 movaps %xmm0, 336(%rsp) movaps %xmm0, 352(%rsp) movaps %xmm0, 304(%rsp) movaps %xmm0, 320(%rsp) movaps %xmm0, 272(%rsp) movaps %xmm0, 288(%rsp) movaps %xmm0, 240(%rsp) movaps %xmm0, 256(%rsp) movaps %xmm0, 208(%rsp) movaps %xmm0, 224(%rsp) movaps %xmm0, 176(%rsp) movaps %xmm0, 192(%rsp) jmp .L681 .L683: movl 352(%rsp), %eax movq (%rbp), %rcx imull (%rbp), %eax movq 8(%rbp), %rbx movl %eax, 352(%rsp) movq 352(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 16(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 352(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 352(%rsp) movl 360(%rsp), %eax imull 8(%rbp), %eax movl %eax, 360(%rsp) movq 360(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 24(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 360(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 360(%rsp) movl 336(%rsp), %eax imull 16(%rbp), %eax movl %eax, 336(%rsp) movq 336(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 32(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 336(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 336(%rsp) movl 344(%rsp), %eax imull 24(%rbp), %eax movl %eax, 344(%rsp) movq 344(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 40(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 344(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 344(%rsp) movl 320(%rsp), %eax imull 32(%rbp), %eax movl %eax, 320(%rsp) movq 320(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 320(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 320(%rsp) movl 328(%rsp), %eax imull 40(%rbp), %eax movq 48(%rbp), %rcx movl %eax, 328(%rsp) movq 328(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 56(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 328(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 328(%rsp) movl 304(%rsp), %eax imull 48(%rbp), %eax movl %eax, 304(%rsp) movq 304(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 64(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 304(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 304(%rsp) movl 312(%rsp), %eax imull 56(%rbp), %eax movl %eax, 312(%rsp) movq 312(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 72(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 312(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 312(%rsp) movl 288(%rsp), %eax imull 64(%rbp), %eax movl %eax, 288(%rsp) movq 288(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 288(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 288(%rsp) movl 296(%rsp), %eax imull 72(%rbp), %eax movl %eax, 296(%rsp) movq 296(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 296(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 296(%rsp) movl 272(%rsp), %eax imull 80(%rbp), %eax movq 80(%rbp), %rcx movq 88(%rbp), %rbx movl %eax, 272(%rsp) movq 272(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 96(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 272(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 272(%rsp) movl 280(%rsp), %eax imull 88(%rbp), %eax movl %eax, 280(%rsp) movq 280(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 104(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 280(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 280(%rsp) movl 256(%rsp), %eax imull 96(%rbp), %eax movl %eax, 256(%rsp) movq 256(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 112(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 256(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 256(%rsp) movl 264(%rsp), %eax imull 104(%rbp), %eax movl %eax, 264(%rsp) movq 264(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 120(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 264(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 264(%rsp) movl 240(%rsp), %eax imull 112(%rbp), %eax movl %eax, 240(%rsp) movq 240(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rdx sarq $32, %rax imull %eax, %edx movq 240(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 240(%rsp) movl 248(%rsp), %eax imull 120(%rbp), %eax movq 128(%rbp), %rcx movl %eax, 248(%rsp) movq 248(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 136(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 248(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 248(%rsp) movl 224(%rsp), %eax imull 128(%rbp), %eax movl %eax, 224(%rsp) movq 224(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 144(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 224(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 224(%rsp) movl 232(%rsp), %eax imull 136(%rbp), %eax movl %eax, 232(%rsp) movq 232(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 152(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 232(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 232(%rsp) movl 208(%rsp), %eax imull 144(%rbp), %eax movl %eax, 208(%rsp) movq 208(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 208(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 208(%rsp) movl 216(%rsp), %eax imull 152(%rbp), %eax movl %eax, 216(%rsp) movq 216(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 216(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 216(%rsp) movl 192(%rsp), %eax imull 160(%rbp), %eax movq 160(%rbp), %rcx movq 168(%rbp), %rbx movl %eax, 192(%rsp) movq 192(%rsp), %rax movq %rax, %rdx movq %rcx, %rax movq 176(%rbp), %rcx sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 192(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 192(%rsp) movl 200(%rsp), %eax imull 168(%rbp), %eax movl %eax, 200(%rsp) movq 200(%rsp), %rdx movq %rbx, %rax sarq $32, %rax movq 184(%rbp), %rbx sarq $32, %rdx imull %eax, %edx movq 200(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 200(%rsp) movl 176(%rsp), %eax imull 176(%rbp), %eax movl %eax, 176(%rsp) movq 176(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 176(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 176(%rsp) movl 184(%rsp), %eax imull 184(%rbp), %eax movl %eax, 184(%rsp) movq 184(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 184(%rsp), %rax salq $32, %rdx andl $4294967295, %eax addq $192, %rbp orq %rdx, %rax subl $48, %esi movq %rax, 184(%rsp) .L681: cmpl $47, %esi jg .L683 testl %esi, %esi jmp .L682 .L684: imull (%rbp), %r8d addq $4, %rbp decl %esi .L682: jne .L684 movl 352(%rsp), %eax movq 352(%rsp), %rsi imull 336(%rsp), %eax movq 336(%rsp), %rcx movq 360(%rsp), %rdi movq %rsi, %rdx movq 344(%rsp), %rbx movq 320(%rsp), %rsi sarq $32, %rdx movl %eax, 160(%rsp) movq %rcx, %rax movq 304(%rsp), %rcx sarq $32, %rax imull %eax, %edx movq 160(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 328(%rsp), %rdi movq %rax, 160(%rsp) movl 360(%rsp), %eax sarq $32, %rdx imull 344(%rsp), %eax movl %eax, 168(%rsp) movq %rbx, %rax movq 312(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 168(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 160(%rsp), %rsi movq %rax, 168(%rsp) movl 320(%rsp), %eax sarq $32, %rdx imull 304(%rsp), %eax movl %eax, 144(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 144(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 168(%rsp), %rdi movq %rax, 144(%rsp) movl 328(%rsp), %eax sarq $32, %rdx imull 312(%rsp), %eax movq 144(%rsp), %rcx movl %eax, 152(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 152(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 152(%rsp) movl 160(%rsp), %eax sarq $32, %rdx imull 144(%rsp), %eax movq 152(%rsp), %rbx movl %eax, 128(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 128(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 128(%rsp) movl 168(%rsp), %eax sarq $32, %rdx imull 152(%rsp), %eax movq 288(%rsp), %rsi movq 272(%rsp), %rcx movq 296(%rsp), %rdi movl %eax, 136(%rsp) movq %rbx, %rax movq 280(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 136(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 128(%rsp), %rsi movq %rax, 136(%rsp) movl 288(%rsp), %eax sarq $32, %rdx imull 272(%rsp), %eax movl %eax, 112(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 112(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 136(%rsp), %rdi movq %rax, 112(%rsp) movl 296(%rsp), %eax sarq $32, %rdx imull 280(%rsp), %eax movq 112(%rsp), %rcx movl %eax, 120(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 120(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 120(%rsp) movl 128(%rsp), %eax sarq $32, %rdx imull 112(%rsp), %eax movq 120(%rsp), %rbx movl %eax, 96(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 96(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 96(%rsp) movl 136(%rsp), %eax sarq $32, %rdx imull 120(%rsp), %eax movl %eax, 104(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 104(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 104(%rsp) movl 256(%rsp), %eax imull 240(%rsp), %eax movl %eax, 80(%rsp) movq 256(%rsp), %rsi movq 240(%rsp), %rcx movq 264(%rsp), %rdi movq 248(%rsp), %rbx movq %rsi, %rdx movq 96(%rsp), %rsi movq %rcx, %rax sarq $32, %rdx sarq $32, %rax imull %eax, %edx movq 80(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 104(%rsp), %rdi movq %rax, 80(%rsp) movl 264(%rsp), %eax sarq $32, %rdx imull 248(%rsp), %eax movq 80(%rsp), %rcx movl %eax, 88(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 88(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 224(%rsp), %rsi movq %rax, 88(%rsp) movl 96(%rsp), %eax sarq $32, %rdx imull 80(%rsp), %eax movq 88(%rsp), %rbx movl %eax, 64(%rsp) movq %rcx, %rax movq 208(%rsp), %rcx sarq $32, %rax imull %eax, %edx movq 64(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 232(%rsp), %rdi movq %rax, 64(%rsp) movl 104(%rsp), %eax sarq $32, %rdx imull 88(%rsp), %eax movl %eax, 72(%rsp) movq %rbx, %rax movq 216(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 72(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 72(%rsp) movl 224(%rsp), %eax imull 208(%rsp), %eax sarq $32, %rdx movl %eax, 48(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 48(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 48(%rsp) movl 232(%rsp), %eax sarq $32, %rdx imull 216(%rsp), %eax movq 64(%rsp), %rsi movq 48(%rsp), %rcx movq 72(%rsp), %rdi movl %eax, 56(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 56(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 192(%rsp), %rsi movq %rax, 56(%rsp) movl 64(%rsp), %eax sarq $32, %rdx imull 48(%rsp), %eax movq 56(%rsp), %rbx movl %eax, 32(%rsp) movq %rcx, %rax movq 176(%rsp), %rcx sarq $32, %rax imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 200(%rsp), %rdi movq %rax, 32(%rsp) movl 72(%rsp), %eax sarq $32, %rdx imull 56(%rsp), %eax movl %eax, 40(%rsp) movq %rbx, %rax movq 184(%rsp), %rbx sarq $32, %rax imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 40(%rsp) movl 192(%rsp), %eax sarq $32, %rdx imull 176(%rsp), %eax movl %eax, 16(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 16(%rsp) movl 200(%rsp), %eax sarq $32, %rdx imull 184(%rsp), %eax movl %eax, 24(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 24(%rsp) movl 32(%rsp), %eax imull 16(%rsp), %eax movl %eax, (%rsp) movq 32(%rsp), %rsi movq 16(%rsp), %rcx movq 40(%rsp), %rdi movq 24(%rsp), %rbx movq %rsi, %rdx movq %rcx, %rax sarq $32, %rdx sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 40(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax xorl %edx, %edx movq %rax, 8(%rsp) movdqa (%rsp), %xmm0 movaps %xmm0, 368(%rsp) .L672: movslq %edx,%rax incl %edx imull 368(%rsp,%rax,4), %r8d cmpl $3, %edx jle .L672 movl %r8d, (%r12) addq $384, %rsp popq %rbx popq %rbp popq %r12 ret unrollv2a_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $48, %rsp movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r9d movl %eax, %r8d xorl %edx, %edx .L689: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 32(%rsp,%rax,4) jle .L689 movdqa 32(%rsp), %xmm0 movaps %xmm0, 16(%rsp) jmp .L708 .L710: movl (%rbp), %eax movq 16(%rbp), %rcx subl $8, %r8d imull 16(%rbp), %eax movq (%rbp), %rsi movq 24(%rbp), %rbx movq 8(%rbp), %rdi movq %rsi, %rdx sarq $32, %rdx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 8(%rbp), %eax sarq $32, %rdx imull 24(%rbp), %eax movq (%rsp), %rcx addq $32, %rbp movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 8(%rsp) movl 16(%rsp), %eax imull (%rsp), %eax movq 8(%rsp), %rbx movl %eax, 16(%rsp) movq 16(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 16(%rsp) movl 24(%rsp), %eax imull 8(%rsp), %eax movl %eax, 24(%rsp) movq 24(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 24(%rsp) .L708: cmpl $7, %r8d jg .L710 testl %r8d, %r8d jmp .L709 .L711: imull (%rbp), %r9d addq $4, %rbp decl %r8d .L709: jne .L711 movdqa 16(%rsp), %xmm0 xorl %edx, %edx movaps %xmm0, 32(%rsp) .L699: movslq %edx,%rax incl %edx imull 32(%rsp,%rax,4), %r9d cmpl $3, %edx jle .L699 movl %r9d, (%r12) addq $48, %rsp popq %rbx popq %rbp popq %r12 ret unrollv4a_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $80, %rsp movaps %xmm0, 32(%rsp) movaps %xmm0, 16(%rsp) movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r9d movl %eax, %r8d xorl %edx, %edx .L716: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 64(%rsp,%rax,4) jle .L716 movdqa 64(%rsp), %xmm0 movaps %xmm0, 48(%rsp) jmp .L735 .L737: movl (%rbp), %eax movq (%rbp), %rsi imull 16(%rbp), %eax movq 16(%rbp), %rcx movq 8(%rbp), %rdi movq %rsi, %rdx movq 24(%rbp), %rbx movq 32(%rbp), %rsi sarq $32, %rdx movl %eax, 32(%rsp) movq %rcx, %rax movq 48(%rbp), %rcx sarq $32, %rax imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 40(%rbp), %rdi movq %rax, 32(%rsp) movl 8(%rbp), %eax sarq $32, %rdx imull 24(%rbp), %eax movl %eax, 40(%rsp) movq %rbx, %rax movq 56(%rbp), %rbx sarq $32, %rax imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 32(%rsp), %rsi movq %rax, 40(%rsp) movl 32(%rbp), %eax sarq $32, %rdx imull 48(%rbp), %eax movl %eax, 16(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 40(%rsp), %rdi movq %rax, 16(%rsp) movl 40(%rbp), %eax sarq $32, %rdx imull 56(%rbp), %eax movq 16(%rsp), %rcx movl %eax, 24(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 24(%rsp) movl 32(%rsp), %eax sarq $32, %rdx imull 16(%rsp), %eax movq 24(%rsp), %rbx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax addq $64, %rbp orq %rdx, %rax movq %rdi, %rdx subl $16, %r8d movq %rax, (%rsp) movl 40(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movq (%rsp), %rcx movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 8(%rsp) movl 48(%rsp), %eax imull (%rsp), %eax movq 8(%rsp), %rbx movl %eax, 48(%rsp) movq 48(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 48(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 48(%rsp) movl 56(%rsp), %eax imull 8(%rsp), %eax movl %eax, 56(%rsp) movq 56(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 56(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 56(%rsp) .L735: cmpl $15, %r8d jg .L737 testl %r8d, %r8d jmp .L736 .L738: imull (%rbp), %r9d addq $4, %rbp decl %r8d .L736: jne .L738 movdqa 48(%rsp), %xmm0 xorl %edx, %edx movaps %xmm0, 64(%rsp) .L726: movslq %edx,%rax incl %edx imull 64(%rsp,%rax,4), %r9d cmpl $3, %edx jle .L726 movl %r9d, (%r12) addq $80, %rsp popq %rbx popq %rbp popq %r12 ret unrollv8a_combine: pushq %r12 pxor %xmm0, %xmm0 movq %rsi, %r12 pushq %rbp pushq %rbx movq %rdi, %rbx subq $144, %rsp movaps %xmm0, 96(%rsp) movaps %xmm0, 80(%rsp) movaps %xmm0, 64(%rsp) movaps %xmm0, 48(%rsp) movaps %xmm0, 32(%rsp) movaps %xmm0, 16(%rsp) movaps %xmm0, (%rsp) call get_vec_start movq %rbx, %rdi movq %rax, %rbp call vec_length movl $1, %r9d movl %eax, %r8d xorl %edx, %edx .L743: movslq %edx,%rax incl %edx cmpl $3, %edx movl $1, 128(%rsp,%rax,4) jle .L743 movdqa 128(%rsp), %xmm0 movaps %xmm0, 112(%rsp) jmp .L762 .L764: movl (%rbp), %eax movq (%rbp), %rsi imull 16(%rbp), %eax movq 16(%rbp), %rcx movq 8(%rbp), %rdi movq %rsi, %rdx movq 24(%rbp), %rbx movq 32(%rbp), %rsi sarq $32, %rdx movl %eax, 96(%rsp) movq %rcx, %rax movq 48(%rbp), %rcx sarq $32, %rax imull %eax, %edx movq 96(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 40(%rbp), %rdi movq %rax, 96(%rsp) movl 8(%rbp), %eax sarq $32, %rdx imull 24(%rbp), %eax movl %eax, 104(%rsp) movq %rbx, %rax movq 56(%rbp), %rbx sarq $32, %rax imull %eax, %edx movq 104(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 96(%rsp), %rsi movq %rax, 104(%rsp) movl 32(%rbp), %eax sarq $32, %rdx imull 48(%rbp), %eax movl %eax, 80(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 80(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 104(%rsp), %rdi movq %rax, 80(%rsp) movl 40(%rbp), %eax sarq $32, %rdx imull 56(%rbp), %eax movq 80(%rsp), %rcx movl %eax, 88(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 88(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 88(%rsp) movl 96(%rsp), %eax sarq $32, %rdx imull 80(%rsp), %eax movq 88(%rsp), %rbx movl %eax, 64(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 64(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 64(%rsp) movl 104(%rsp), %eax sarq $32, %rdx imull 88(%rsp), %eax movq 64(%rbp), %rsi movq 80(%rbp), %rcx movq 72(%rbp), %rdi movl %eax, 72(%rsp) movq %rbx, %rax movq 88(%rbp), %rbx sarq $32, %rax imull %eax, %edx movq 72(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq 96(%rbp), %rsi movq %rax, 72(%rsp) movl 64(%rbp), %eax sarq $32, %rdx imull 80(%rbp), %eax movl %eax, 48(%rsp) movq %rcx, %rax movq 112(%rbp), %rcx sarq $32, %rax imull %eax, %edx movq 48(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 104(%rbp), %rdi movq %rax, 48(%rsp) movl 72(%rbp), %eax sarq $32, %rdx imull 88(%rbp), %eax movl %eax, 56(%rsp) movq %rbx, %rax movq 120(%rbp), %rbx sarq $32, %rax imull %eax, %edx movq 56(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 56(%rsp) movl 96(%rbp), %eax sarq $32, %rdx imull 112(%rbp), %eax movl %eax, 32(%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq 32(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, 32(%rsp) movl 104(%rbp), %eax sarq $32, %rdx imull 120(%rbp), %eax movl %eax, 40(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 40(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 40(%rsp) movl 48(%rsp), %eax imull 32(%rsp), %eax movl %eax, 16(%rsp) movq 48(%rsp), %rsi movq 32(%rsp), %rcx movq 56(%rsp), %rdi movq 40(%rsp), %rbx movq %rsi, %rdx movq 64(%rsp), %rsi movq %rcx, %rax sarq $32, %rdx sarq $32, %rax imull %eax, %edx movq 16(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq 72(%rsp), %rdi movq %rax, 16(%rsp) movl 56(%rsp), %eax sarq $32, %rdx imull 40(%rsp), %eax movq 16(%rsp), %rcx movl %eax, 24(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 24(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rsi, %rdx movq %rax, 24(%rsp) movl 64(%rsp), %eax sarq $32, %rdx imull 16(%rsp), %eax movq 24(%rsp), %rbx movl %eax, (%rsp) movq %rcx, %rax sarq $32, %rax imull %eax, %edx movq (%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rdi, %rdx movq %rax, (%rsp) movl 72(%rsp), %eax sarq $32, %rdx imull 24(%rsp), %eax movq (%rsp), %rcx movl %eax, 8(%rsp) movq %rbx, %rax sarq $32, %rax imull %eax, %edx movq 8(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 8(%rsp) movl 112(%rsp), %eax imull (%rsp), %eax movq 8(%rsp), %rbx movl %eax, 112(%rsp) movq 112(%rsp), %rax movq %rax, %rdx movq %rcx, %rax sarq $32, %rdx sarq $32, %rax subq $-128, %rbp imull %eax, %edx movq 112(%rsp), %rax subl $32, %r8d salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 112(%rsp) movl 120(%rsp), %eax imull 8(%rsp), %eax movl %eax, 120(%rsp) movq 120(%rsp), %rdx movq %rbx, %rax sarq $32, %rax sarq $32, %rdx imull %eax, %edx movq 120(%rsp), %rax salq $32, %rdx andl $4294967295, %eax orq %rdx, %rax movq %rax, 120(%rsp) .L762: cmpl $31, %r8d jg .L764 testl %r8d, %r8d jmp .L763 .L765: imull (%rbp), %r9d addq $4, %rbp decl %r8d .L763: jne .L765 movdqa 112(%rsp), %xmm0 xorl %edx, %edx movaps %xmm0, 128(%rsp) .L753: movslq %edx,%rax incl %edx imull 128(%rsp,%rax,4), %r9d cmpl $3, %edx jle .L753 movl %r9d, (%r12) addq $144, %rsp popq %rbx popq %rbp popq %r12 ret register_combiners: subq $8, %rsp movl $combine1_descr, %edx movl $combine1, %esi movl $combine1, %edi call add_combiner movl $combine2_descr, %edx movl $combine1, %esi movl $combine2, %edi call add_combiner movl $combine3_descr, %edx movl $combine1, %esi movl $combine3, %edi call add_combiner movl $combine4_descr, %edx movl $combine1, %esi movl $combine4, %edi call add_combiner movl $combine4p_descr, %edx movl $combine1, %esi movl $combine4p, %edi call add_combiner movl $unroll2a_descr, %edx movl $combine1, %esi movl $unroll2a_combine, %edi call add_combiner movl $combine5p_descr, %edx movl $combine1, %esi movl $combine5p, %edi call add_combiner movl $unroll3aw_descr, %edx movl $combine1, %esi movl $unroll3aw_combine, %edi call add_combiner movl $unroll4a_descr, %edx movl $combine1, %esi movl $unroll4a_combine, %edi call add_combiner movl $unroll8a_descr, %edx movl $combine1, %esi movl $unroll8a_combine, %edi call add_combiner movl $unroll16a_descr, %edx movl $combine1, %esi movl $unroll16a_combine, %edi call add_combiner movl $unroll2_descr, %edx movl $combine1, %esi movl $unroll2_combine, %edi call add_combiner movl $unroll3_descr, %edx movl $combine1, %esi movl $unroll3_combine, %edi call add_combiner movl $unroll4_descr, %edx movl $combine1, %esi movl $unroll4_combine, %edi call add_combiner movl $unroll8_descr, %edx movl $combine1, %esi movl $unroll8_combine, %edi call add_combiner movl $unroll16_descr, %edx movl $combine1, %esi movl $unroll16_combine, %edi call add_combiner movl $combine6_descr, %edx movl $combine1, %esi movl $combine6, %edi call add_combiner movl $unroll4x2a_descr, %edx movl $combine1, %esi movl $unroll4x2a_combine, %edi call add_combiner movl $unroll8x2a_descr, %edx movl $combine1, %esi movl $unroll8x2a_combine, %edi call add_combiner movl $unroll3x3a_descr, %edx movl $combine1, %esi movl $unroll3x3a_combine, %edi call add_combiner movl $unroll4x4a_descr, %edx movl $combine1, %esi movl $unroll4x4a_combine, %edi call add_combiner movl $unroll8x4a_descr, %edx movl $combine1, %esi movl $unroll8x4a_combine, %edi call add_combiner movl $unroll6x6a_descr, %edx movl $combine1, %esi movl $unroll6x6a_combine, %edi call add_combiner movl $unroll8x8a_descr, %edx movl $combine1, %esi movl $unroll8x8a_combine, %edi call add_combiner movl $unroll10x10a_descr, %edx movl $combine1, %esi movl $unroll10x10a_combine, %edi call add_combiner movl $unroll12x6a_descr, %edx movl $combine1, %esi movl $unroll12x6a_combine, %edi call add_combiner movl $unroll12x12a_descr, %edx movl $combine1, %esi movl $unroll12x12a_combine, %edi call add_combiner movl $unroll8x2_descr, %edx movl $combine1, %esi movl $unroll8x2_combine, %edi call add_combiner movl $unroll8x4_descr, %edx movl $combine1, %esi movl $unroll8x4_combine, %edi call add_combiner movl $unroll8x8_descr, %edx movl $combine1, %esi movl $unroll8x8_combine, %edi call add_combiner movl $unroll9x3_descr, %edx movl $combine1, %esi movl $unroll9x3_combine, %edi call add_combiner movl $unrollx2as_descr, %edx movl $combine1, %esi movl $unrollx2as_combine, %edi call add_combiner movl $unroll2aa_descr, %edx movl $combine1, %esi movl $unroll2aa_combine, %edi call add_combiner movl $unroll3aa_descr, %edx movl $combine1, %esi movl $unroll3aa_combine, %edi call add_combiner movl $unroll4aa_descr, %edx movl $combine1, %esi movl $unroll4aa_combine, %edi call add_combiner movl $unroll6aa_descr, %edx movl $combine1, %esi movl $unroll6aa_combine, %edi call add_combiner movl $unroll8aa_descr, %edx movl $combine1, %esi movl $unroll8aa_combine, %edi call add_combiner movl $unrollv1_descr, %edx movl $combine1, %esi movl $unrollv1_combine, %edi call add_combiner movl $unrollv2_descr, %edx movl $combine1, %esi movl $unrollv2_combine, %edi call add_combiner movl $unrollv4_descr, %edx movl $combine1, %esi movl $unrollv4_combine, %edi call add_combiner movl $unrollv8_descr, %edx movl $combine1, %esi movl $unrollv8_combine, %edi call add_combiner movl $unrollv12_descr, %edx movl $combine1, %esi movl $unrollv12_combine, %edi call add_combiner movl $unrollv2a_descr, %edx movl $combine1, %esi movl $unrollv2a_combine, %edi call add_combiner movl $unrollv4a_descr, %edx movl $combine1, %esi movl $unrollv4a_combine, %edi call add_combiner movl $unrollv8a_descr, %edx movl $combine1, %esi movl $unrollv8a_combine, %edi addq $8, %rsp jmp add_combiner .Lframe1: