f-stack/freebsd/crypto/openssl/amd64/aesni-sha256-x86_64.S

4438 lines
89 KiB
ArmAsm
Raw Normal View History

/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from aesni-sha256-x86_64.pl. */
.text
.globl aesni_cbc_sha256_enc
.type aesni_cbc_sha256_enc,@function
.align 16
aesni_cbc_sha256_enc:
.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl $1,%eax
cmpq $0,%rdi
je .Lprobe
movl 0(%r11),%eax
movq 4(%r11),%r10
btq $61,%r10
jc aesni_cbc_sha256_enc_shaext
movq %r10,%r11
shrq $32,%r11
testl $2048,%r10d
jnz aesni_cbc_sha256_enc_xop
andl $296,%r11d
cmpl $296,%r11d
je aesni_cbc_sha256_enc_avx2
andl $268435456,%r10d
jnz aesni_cbc_sha256_enc_avx
ud2
xorl %eax,%eax
cmpq $0,%rdi
je .Lprobe
ud2
.Lprobe:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc
.align 64
.type K256,@object
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1
.long 0,0,0,0, 0,0,0,0
.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.type aesni_cbc_sha256_enc_xop,@function
.align 64
aesni_cbc_sha256_enc_xop:
.cfi_startproc
.Lxop_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
shlq $6,%rdx
subq %rdi,%rsi
subq %rdi,%r10
addq %rdi,%rdx
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_xop:
vzeroall
movq %rdi,%r12
leaq 128(%rcx),%rdi
leaq K256+544(%rip),%r13
movl 240-128(%rdi),%r14d
movq %r9,%r15
movq %r10,%rsi
vmovdqu (%r8),%xmm8
subq $9,%r14
movl 0(%r15),%eax
movl 4(%r15),%ebx
movl 8(%r15),%ecx
movl 12(%r15),%edx
movl 16(%r15),%r8d
movl 20(%r15),%r9d
movl 24(%r15),%r10d
movl 28(%r15),%r11d
vmovdqa 0(%r13,%r14,8),%xmm14
vmovdqa 16(%r13,%r14,8),%xmm13
vmovdqa 32(%r13,%r14,8),%xmm12
vmovdqu 0-128(%rdi),%xmm10
jmp .Lloop_xop
.align 16
.Lloop_xop:
vmovdqa K256+512(%rip),%xmm7
vmovdqu 0(%rsi,%r12,1),%xmm0
vmovdqu 16(%rsi,%r12,1),%xmm1
vmovdqu 32(%rsi,%r12,1),%xmm2
vmovdqu 48(%rsi,%r12,1),%xmm3
vpshufb %xmm7,%xmm0,%xmm0
leaq K256(%rip),%rbp
vpshufb %xmm7,%xmm1,%xmm1
vpshufb %xmm7,%xmm2,%xmm2
vpaddd 0(%rbp),%xmm0,%xmm4
vpshufb %xmm7,%xmm3,%xmm3
vpaddd 32(%rbp),%xmm1,%xmm5
vpaddd 64(%rbp),%xmm2,%xmm6
vpaddd 96(%rbp),%xmm3,%xmm7
vmovdqa %xmm4,0(%rsp)
movl %eax,%r14d
vmovdqa %xmm5,16(%rsp)
movl %ebx,%esi
vmovdqa %xmm6,32(%rsp)
xorl %ecx,%esi
vmovdqa %xmm7,48(%rsp)
movl %r8d,%r13d
jmp .Lxop_00_47
.align 16
.Lxop_00_47:
subq $-32*4,%rbp
vmovdqu (%r12),%xmm9
movq %r12,64+0(%rsp)
vpalignr $4,%xmm0,%xmm1,%xmm4
rorl $14,%r13d
movl %r14d,%eax
vpalignr $4,%xmm2,%xmm3,%xmm7
movl %r9d,%r12d
xorl %r8d,%r13d
.byte 143,232,120,194,236,14
rorl $9,%r14d
xorl %r10d,%r12d
vpsrld $3,%xmm4,%xmm4
rorl $5,%r13d
xorl %eax,%r14d
vpaddd %xmm7,%xmm0,%xmm0
andl %r8d,%r12d
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 0(%rsp),%r11d
movl %eax,%r15d
.byte 143,232,120,194,245,11
rorl $11,%r14d
xorl %r10d,%r12d
vpxor %xmm5,%xmm4,%xmm4
xorl %ebx,%r15d
rorl $6,%r13d
addl %r12d,%r11d
andl %r15d,%esi
.byte 143,232,120,194,251,13
xorl %eax,%r14d
addl %r13d,%r11d
vpxor %xmm6,%xmm4,%xmm4
xorl %ebx,%esi
addl %r11d,%edx
vpsrld $10,%xmm3,%xmm6
rorl $2,%r14d
addl %esi,%r11d
vpaddd %xmm4,%xmm0,%xmm0
movl %edx,%r13d
addl %r11d,%r14d
.byte 143,232,120,194,239,2
rorl $14,%r13d
movl %r14d,%r11d
vpxor %xmm6,%xmm7,%xmm7
movl %r8d,%r12d
xorl %edx,%r13d
rorl $9,%r14d
xorl %r9d,%r12d
vpxor %xmm5,%xmm7,%xmm7
rorl $5,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vpxor %xmm8,%xmm9,%xmm9
xorl %edx,%r13d
vpsrldq $8,%xmm7,%xmm7
addl 4(%rsp),%r10d
movl %r11d,%esi
rorl $11,%r14d
xorl %r9d,%r12d
vpaddd %xmm7,%xmm0,%xmm0
xorl %eax,%esi
rorl $6,%r13d
addl %r12d,%r10d
andl %esi,%r15d
.byte 143,232,120,194,248,13
xorl %r11d,%r14d
addl %r13d,%r10d
vpsrld $10,%xmm0,%xmm6
xorl %eax,%r15d
addl %r10d,%ecx
.byte 143,232,120,194,239,2
rorl $2,%r14d
addl %r15d,%r10d
vpxor %xmm6,%xmm7,%xmm7
movl %ecx,%r13d
addl %r10d,%r14d
rorl $14,%r13d
movl %r14d,%r10d
vpxor %xmm5,%xmm7,%xmm7
movl %edx,%r12d
xorl %ecx,%r13d
rorl $9,%r14d
xorl %r8d,%r12d
vpslldq $8,%xmm7,%xmm7
rorl $5,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %ecx,%r13d
vpaddd %xmm7,%xmm0,%xmm0
addl 8(%rsp),%r9d
movl %r10d,%r15d
rorl $11,%r14d
xorl %r8d,%r12d
vpaddd 0(%rbp),%xmm0,%xmm6
xorl %r11d,%r15d
rorl $6,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
rorl $2,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
rorl $14,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
rorl $9,%r14d
xorl %edx,%r12d
rorl $5,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 12(%rsp),%r8d
movl %r9d,%esi
rorl $11,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
rorl $6,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
rorl $2,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
vmovdqa %xmm6,0(%rsp)
vpalignr $4,%xmm1,%xmm2,%xmm4
rorl $14,%r13d
movl %r14d,%r8d
vpalignr $4,%xmm3,%xmm0,%xmm7
movl %ebx,%r12d
xorl %eax,%r13d
.byte 143,232,120,194,236,14
rorl $9,%r14d
xorl %ecx,%r12d
vpsrld $3,%xmm4,%xmm4
rorl $5,%r13d
xorl %r8d,%r14d
vpaddd %xmm7,%xmm1,%xmm1
andl %eax,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %eax,%r13d
addl 16(%rsp),%edx
movl %r8d,%r15d
.byte 143,232,120,194,245,11
rorl $11,%r14d
xorl %ecx,%r12d
vpxor %xmm5,%xmm4,%xmm4
xorl %r9d,%r15d
rorl $6,%r13d
addl %r12d,%edx
andl %r15d,%esi
.byte 143,232,120,194,248,13
xorl %r8d,%r14d
addl %r13d,%edx
vpxor %xmm6,%xmm4,%xmm4
xorl %r9d,%esi
addl %edx,%r11d
vpsrld $10,%xmm0,%xmm6
rorl $2,%r14d
addl %esi,%edx
vpaddd %xmm4,%xmm1,%xmm1
movl %r11d,%r13d
addl %edx,%r14d
.byte 143,232,120,194,239,2
rorl $14,%r13d
movl %r14d,%edx
vpxor %xmm6,%xmm7,%xmm7
movl %eax,%r12d
xorl %r11d,%r13d
rorl $9,%r14d
xorl %ebx,%r12d
vpxor %xmm5,%xmm7,%xmm7
rorl $5,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r11d,%r13d
vpsrldq $8,%xmm7,%xmm7
addl 20(%rsp),%ecx
movl %edx,%esi
rorl $11,%r14d
xorl %ebx,%r12d
vpaddd %xmm7,%xmm1,%xmm1
xorl %r8d,%esi
rorl $6,%r13d
addl %r12d,%ecx
andl %esi,%r15d
.byte 143,232,120,194,249,13
xorl %edx,%r14d
addl %r13d,%ecx
vpsrld $10,%xmm1,%xmm6
xorl %r8d,%r15d
addl %ecx,%r10d
.byte 143,232,120,194,239,2
rorl $2,%r14d
addl %r15d,%ecx
vpxor %xmm6,%xmm7,%xmm7
movl %r10d,%r13d
addl %ecx,%r14d
rorl $14,%r13d
movl %r14d,%ecx
vpxor %xmm5,%xmm7,%xmm7
movl %r11d,%r12d
xorl %r10d,%r13d
rorl $9,%r14d
xorl %eax,%r12d
vpslldq $8,%xmm7,%xmm7
rorl $5,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r10d,%r13d
vpaddd %xmm7,%xmm1,%xmm1
addl 24(%rsp),%ebx
movl %ecx,%r15d
rorl $11,%r14d
xorl %eax,%r12d
vpaddd 32(%rbp),%xmm1,%xmm6
xorl %edx,%r15d
rorl $6,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
rorl $2,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
rorl $14,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
rorl $9,%r14d
xorl %r11d,%r12d
rorl $5,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 28(%rsp),%eax
movl %ebx,%esi
rorl $11,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
rorl $6,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
rorl $2,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
vmovdqa %xmm6,16(%rsp)
vpalignr $4,%xmm2,%xmm3,%xmm4
rorl $14,%r13d
movl %r14d,%eax
vpalignr $4,%xmm0,%xmm1,%xmm7
movl %r9d,%r12d
xorl %r8d,%r13d
.byte 143,232,120,194,236,14
rorl $9,%r14d
xorl %r10d,%r12d
vpsrld $3,%xmm4,%xmm4
rorl $5,%r13d
xorl %eax,%r14d
vpaddd %xmm7,%xmm2,%xmm2
andl %r8d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 32(%rsp),%r11d
movl %eax,%r15d
.byte 143,232,120,194,245,11
rorl $11,%r14d
xorl %r10d,%r12d
vpxor %xmm5,%xmm4,%xmm4
xorl %ebx,%r15d
rorl $6,%r13d
addl %r12d,%r11d
andl %r15d,%esi
.byte 143,232,120,194,249,13
xorl %eax,%r14d
addl %r13d,%r11d
vpxor %xmm6,%xmm4,%xmm4
xorl %ebx,%esi
addl %r11d,%edx
vpsrld $10,%xmm1,%xmm6
rorl $2,%r14d
addl %esi,%r11d
vpaddd %xmm4,%xmm2,%xmm2
movl %edx,%r13d
addl %r11d,%r14d
.byte 143,232,120,194,239,2
rorl $14,%r13d
movl %r14d,%r11d
vpxor %xmm6,%xmm7,%xmm7
movl %r8d,%r12d
xorl %edx,%r13d
rorl $9,%r14d
xorl %r9d,%r12d
vpxor %xmm5,%xmm7,%xmm7
rorl $5,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %edx,%r13d
vpsrldq $8,%xmm7,%xmm7
addl 36(%rsp),%r10d
movl %r11d,%esi
rorl $11,%r14d
xorl %r9d,%r12d
vpaddd %xmm7,%xmm2,%xmm2
xorl %eax,%esi
rorl $6,%r13d
addl %r12d,%r10d
andl %esi,%r15d
.byte 143,232,120,194,250,13
xorl %r11d,%r14d
addl %r13d,%r10d
vpsrld $10,%xmm2,%xmm6
xorl %eax,%r15d
addl %r10d,%ecx
.byte 143,232,120,194,239,2
rorl $2,%r14d
addl %r15d,%r10d
vpxor %xmm6,%xmm7,%xmm7
movl %ecx,%r13d
addl %r10d,%r14d
rorl $14,%r13d
movl %r14d,%r10d
vpxor %xmm5,%xmm7,%xmm7
movl %edx,%r12d
xorl %ecx,%r13d
rorl $9,%r14d
xorl %r8d,%r12d
vpslldq $8,%xmm7,%xmm7
rorl $5,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %ecx,%r13d
vpaddd %xmm7,%xmm2,%xmm2
addl 40(%rsp),%r9d
movl %r10d,%r15d
rorl $11,%r14d
xorl %r8d,%r12d
vpaddd 64(%rbp),%xmm2,%xmm6
xorl %r11d,%r15d
rorl $6,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
rorl $2,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
rorl $14,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
rorl $9,%r14d
xorl %edx,%r12d
rorl $5,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 44(%rsp),%r8d
movl %r9d,%esi
rorl $11,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
rorl $6,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
rorl $2,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
vmovdqa %xmm6,32(%rsp)
vpalignr $4,%xmm3,%xmm0,%xmm4
rorl $14,%r13d
movl %r14d,%r8d
vpalignr $4,%xmm1,%xmm2,%xmm7
movl %ebx,%r12d
xorl %eax,%r13d
.byte 143,232,120,194,236,14
rorl $9,%r14d
xorl %ecx,%r12d
vpsrld $3,%xmm4,%xmm4
rorl $5,%r13d
xorl %r8d,%r14d
vpaddd %xmm7,%xmm3,%xmm3
andl %eax,%r12d
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %eax,%r13d
addl 48(%rsp),%edx
movl %r8d,%r15d
.byte 143,232,120,194,245,11
rorl $11,%r14d
xorl %ecx,%r12d
vpxor %xmm5,%xmm4,%xmm4
xorl %r9d,%r15d
rorl $6,%r13d
addl %r12d,%edx
andl %r15d,%esi
.byte 143,232,120,194,250,13
xorl %r8d,%r14d
addl %r13d,%edx
vpxor %xmm6,%xmm4,%xmm4
xorl %r9d,%esi
addl %edx,%r11d
vpsrld $10,%xmm2,%xmm6
rorl $2,%r14d
addl %esi,%edx
vpaddd %xmm4,%xmm3,%xmm3
movl %r11d,%r13d
addl %edx,%r14d
.byte 143,232,120,194,239,2
rorl $14,%r13d
movl %r14d,%edx
vpxor %xmm6,%xmm7,%xmm7
movl %eax,%r12d
xorl %r11d,%r13d
rorl $9,%r14d
xorl %ebx,%r12d
vpxor %xmm5,%xmm7,%xmm7
rorl $5,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r11d,%r13d
vpsrldq $8,%xmm7,%xmm7
addl 52(%rsp),%ecx
movl %edx,%esi
rorl $11,%r14d
xorl %ebx,%r12d
vpaddd %xmm7,%xmm3,%xmm3
xorl %r8d,%esi
rorl $6,%r13d
addl %r12d,%ecx
andl %esi,%r15d
.byte 143,232,120,194,251,13
xorl %edx,%r14d
addl %r13d,%ecx
vpsrld $10,%xmm3,%xmm6
xorl %r8d,%r15d
addl %ecx,%r10d
.byte 143,232,120,194,239,2
rorl $2,%r14d
addl %r15d,%ecx
vpxor %xmm6,%xmm7,%xmm7
movl %r10d,%r13d
addl %ecx,%r14d
rorl $14,%r13d
movl %r14d,%ecx
vpxor %xmm5,%xmm7,%xmm7
movl %r11d,%r12d
xorl %r10d,%r13d
rorl $9,%r14d
xorl %eax,%r12d
vpslldq $8,%xmm7,%xmm7
rorl $5,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r10d,%r13d
vpaddd %xmm7,%xmm3,%xmm3
addl 56(%rsp),%ebx
movl %ecx,%r15d
rorl $11,%r14d
xorl %eax,%r12d
vpaddd 96(%rbp),%xmm3,%xmm6
xorl %edx,%r15d
rorl $6,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
rorl $2,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
rorl $14,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
rorl $9,%r14d
xorl %r11d,%r12d
rorl $5,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 60(%rsp),%eax
movl %ebx,%esi
rorl $11,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
rorl $6,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
rorl $2,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
vmovdqa %xmm6,48(%rsp)
movq 64+0(%rsp),%r12
vpand %xmm14,%xmm11,%xmm11
movq 64+8(%rsp),%r15
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r15,%r12,1)
leaq 16(%r12),%r12
cmpb $0,131(%rbp)
jne .Lxop_00_47
vmovdqu (%r12),%xmm9
movq %r12,64+0(%rsp)
rorl $14,%r13d
movl %r14d,%eax
movl %r9d,%r12d
xorl %r8d,%r13d
rorl $9,%r14d
xorl %r10d,%r12d
rorl $5,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 0(%rsp),%r11d
movl %eax,%r15d
rorl $11,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
rorl $6,%r13d
addl %r12d,%r11d
andl %r15d,%esi
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
addl %r11d,%edx
rorl $2,%r14d
addl %esi,%r11d
movl %edx,%r13d
addl %r11d,%r14d
rorl $14,%r13d
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
rorl $9,%r14d
xorl %r9d,%r12d
rorl $5,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vpxor %xmm8,%xmm9,%xmm9
xorl %edx,%r13d
addl 4(%rsp),%r10d
movl %r11d,%esi
rorl $11,%r14d
xorl %r9d,%r12d
xorl %eax,%esi
rorl $6,%r13d
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
rorl $2,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
addl %r10d,%r14d
rorl $14,%r13d
movl %r14d,%r10d
movl %edx,%r12d
xorl %ecx,%r13d
rorl $9,%r14d
xorl %r8d,%r12d
rorl $5,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 8(%rsp),%r9d
movl %r10d,%r15d
rorl $11,%r14d
xorl %r8d,%r12d
xorl %r11d,%r15d
rorl $6,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
rorl $2,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
rorl $14,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
rorl $9,%r14d
xorl %edx,%r12d
rorl $5,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 12(%rsp),%r8d
movl %r9d,%esi
rorl $11,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
rorl $6,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
rorl $2,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
rorl $14,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
xorl %eax,%r13d
rorl $9,%r14d
xorl %ecx,%r12d
rorl $5,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %eax,%r13d
addl 16(%rsp),%edx
movl %r8d,%r15d
rorl $11,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
rorl $6,%r13d
addl %r12d,%edx
andl %r15d,%esi
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
addl %edx,%r11d
rorl $2,%r14d
addl %esi,%edx
movl %r11d,%r13d
addl %edx,%r14d
rorl $14,%r13d
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
rorl $9,%r14d
xorl %ebx,%r12d
rorl $5,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r11d,%r13d
addl 20(%rsp),%ecx
movl %edx,%esi
rorl $11,%r14d
xorl %ebx,%r12d
xorl %r8d,%esi
rorl $6,%r13d
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
rorl $2,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
addl %ecx,%r14d
rorl $14,%r13d
movl %r14d,%ecx
movl %r11d,%r12d
xorl %r10d,%r13d
rorl $9,%r14d
xorl %eax,%r12d
rorl $5,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 24(%rsp),%ebx
movl %ecx,%r15d
rorl $11,%r14d
xorl %eax,%r12d
xorl %edx,%r15d
rorl $6,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
rorl $2,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
rorl $14,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
rorl $9,%r14d
xorl %r11d,%r12d
rorl $5,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 28(%rsp),%eax
movl %ebx,%esi
rorl $11,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
rorl $6,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
rorl $2,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
rorl $14,%r13d
movl %r14d,%eax
movl %r9d,%r12d
xorl %r8d,%r13d
rorl $9,%r14d
xorl %r10d,%r12d
rorl $5,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 32(%rsp),%r11d
movl %eax,%r15d
rorl $11,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
rorl $6,%r13d
addl %r12d,%r11d
andl %r15d,%esi
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
addl %r11d,%edx
rorl $2,%r14d
addl %esi,%r11d
movl %edx,%r13d
addl %r11d,%r14d
rorl $14,%r13d
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
rorl $9,%r14d
xorl %r9d,%r12d
rorl $5,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %edx,%r13d
addl 36(%rsp),%r10d
movl %r11d,%esi
rorl $11,%r14d
xorl %r9d,%r12d
xorl %eax,%esi
rorl $6,%r13d
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
rorl $2,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
addl %r10d,%r14d
rorl $14,%r13d
movl %r14d,%r10d
movl %edx,%r12d
xorl %ecx,%r13d
rorl $9,%r14d
xorl %r8d,%r12d
rorl $5,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 40(%rsp),%r9d
movl %r10d,%r15d
rorl $11,%r14d
xorl %r8d,%r12d
xorl %r11d,%r15d
rorl $6,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
rorl $2,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
rorl $14,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
rorl $9,%r14d
xorl %edx,%r12d
rorl $5,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 44(%rsp),%r8d
movl %r9d,%esi
rorl $11,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
rorl $6,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
rorl $2,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
rorl $14,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
xorl %eax,%r13d
rorl $9,%r14d
xorl %ecx,%r12d
rorl $5,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %eax,%r13d
addl 48(%rsp),%edx
movl %r8d,%r15d
rorl $11,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
rorl $6,%r13d
addl %r12d,%edx
andl %r15d,%esi
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
addl %edx,%r11d
rorl $2,%r14d
addl %esi,%edx
movl %r11d,%r13d
addl %edx,%r14d
rorl $14,%r13d
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
rorl $9,%r14d
xorl %ebx,%r12d
rorl $5,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r11d,%r13d
addl 52(%rsp),%ecx
movl %edx,%esi
rorl $11,%r14d
xorl %ebx,%r12d
xorl %r8d,%esi
rorl $6,%r13d
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
rorl $2,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
addl %ecx,%r14d
rorl $14,%r13d
movl %r14d,%ecx
movl %r11d,%r12d
xorl %r10d,%r13d
rorl $9,%r14d
xorl %eax,%r12d
rorl $5,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 56(%rsp),%ebx
movl %ecx,%r15d
rorl $11,%r14d
xorl %eax,%r12d
xorl %edx,%r15d
rorl $6,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
rorl $2,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
rorl $14,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
rorl $9,%r14d
xorl %r11d,%r12d
rorl $5,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 60(%rsp),%eax
movl %ebx,%esi
rorl $11,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
rorl $6,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
rorl $2,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
movq 64+0(%rsp),%r12
movq 64+8(%rsp),%r13
movq 64+40(%rsp),%r15
movq 64+48(%rsp),%rsi
vpand %xmm14,%xmm11,%xmm11
movl %r14d,%eax
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r12,%r13,1)
leaq 16(%r12),%r12
addl 0(%r15),%eax
addl 4(%r15),%ebx
addl 8(%r15),%ecx
addl 12(%r15),%edx
addl 16(%r15),%r8d
addl 20(%r15),%r9d
addl 24(%r15),%r10d
addl 28(%r15),%r11d
cmpq 64+16(%rsp),%r12
movl %eax,0(%r15)
movl %ebx,4(%r15)
movl %ecx,8(%r15)
movl %edx,12(%r15)
movl %r8d,16(%r15)
movl %r9d,20(%r15)
movl %r10d,24(%r15)
movl %r11d,28(%r15)
jb .Lloop_xop
movq 64+32(%rsp),%r8
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_xop:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
.type aesni_cbc_sha256_enc_avx,@function
.align 64
aesni_cbc_sha256_enc_avx:
.cfi_startproc
.Lavx_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
shlq $6,%rdx
subq %rdi,%rsi
subq %rdi,%r10
addq %rdi,%rdx
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroall
movq %rdi,%r12
leaq 128(%rcx),%rdi
leaq K256+544(%rip),%r13
movl 240-128(%rdi),%r14d
movq %r9,%r15
movq %r10,%rsi
vmovdqu (%r8),%xmm8
subq $9,%r14
movl 0(%r15),%eax
movl 4(%r15),%ebx
movl 8(%r15),%ecx
movl 12(%r15),%edx
movl 16(%r15),%r8d
movl 20(%r15),%r9d
movl 24(%r15),%r10d
movl 28(%r15),%r11d
vmovdqa 0(%r13,%r14,8),%xmm14
vmovdqa 16(%r13,%r14,8),%xmm13
vmovdqa 32(%r13,%r14,8),%xmm12
vmovdqu 0-128(%rdi),%xmm10
jmp .Lloop_avx
.align 16
.Lloop_avx:
vmovdqa K256+512(%rip),%xmm7
vmovdqu 0(%rsi,%r12,1),%xmm0
vmovdqu 16(%rsi,%r12,1),%xmm1
vmovdqu 32(%rsi,%r12,1),%xmm2
vmovdqu 48(%rsi,%r12,1),%xmm3
vpshufb %xmm7,%xmm0,%xmm0
leaq K256(%rip),%rbp
vpshufb %xmm7,%xmm1,%xmm1
vpshufb %xmm7,%xmm2,%xmm2
vpaddd 0(%rbp),%xmm0,%xmm4
vpshufb %xmm7,%xmm3,%xmm3
vpaddd 32(%rbp),%xmm1,%xmm5
vpaddd 64(%rbp),%xmm2,%xmm6
vpaddd 96(%rbp),%xmm3,%xmm7
vmovdqa %xmm4,0(%rsp)
movl %eax,%r14d
vmovdqa %xmm5,16(%rsp)
movl %ebx,%esi
vmovdqa %xmm6,32(%rsp)
xorl %ecx,%esi
vmovdqa %xmm7,48(%rsp)
movl %r8d,%r13d
jmp .Lavx_00_47
.align 16
.Lavx_00_47:
subq $-32*4,%rbp
vmovdqu (%r12),%xmm9
movq %r12,64+0(%rsp)
vpalignr $4,%xmm0,%xmm1,%xmm4
shrdl $14,%r13d,%r13d
movl %r14d,%eax
movl %r9d,%r12d
vpalignr $4,%xmm2,%xmm3,%xmm7
xorl %r8d,%r13d
shrdl $9,%r14d,%r14d
xorl %r10d,%r12d
vpsrld $7,%xmm4,%xmm6
shrdl $5,%r13d,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vpaddd %xmm7,%xmm0,%xmm0
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 0(%rsp),%r11d
movl %eax,%r15d
vpsrld $3,%xmm4,%xmm7
shrdl $11,%r14d,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
vpslld $14,%xmm4,%xmm5
shrdl $6,%r13d,%r13d
addl %r12d,%r11d
andl %r15d,%esi
vpxor %xmm6,%xmm7,%xmm4
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
vpshufd $250,%xmm3,%xmm7
addl %r11d,%edx
shrdl $2,%r14d,%r14d
addl %esi,%r11d
vpsrld $11,%xmm6,%xmm6
movl %edx,%r13d
addl %r11d,%r14d
shrdl $14,%r13d,%r13d
vpxor %xmm5,%xmm4,%xmm4
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
vpslld $11,%xmm5,%xmm5
shrdl $9,%r14d,%r14d
xorl %r9d,%r12d
shrdl $5,%r13d,%r13d
vpxor %xmm6,%xmm4,%xmm4
xorl %r11d,%r14d
andl %edx,%r12d
vpxor %xmm8,%xmm9,%xmm9
xorl %edx,%r13d
vpsrld $10,%xmm7,%xmm6
addl 4(%rsp),%r10d
movl %r11d,%esi
shrdl $11,%r14d,%r14d
vpxor %xmm5,%xmm4,%xmm4
xorl %r9d,%r12d
xorl %eax,%esi
shrdl $6,%r13d,%r13d
vpsrlq $17,%xmm7,%xmm7
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
vpaddd %xmm4,%xmm0,%xmm0
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
vpxor %xmm7,%xmm6,%xmm6
shrdl $2,%r14d,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
vpsrlq $2,%xmm7,%xmm7
addl %r10d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r10d
vpxor %xmm7,%xmm6,%xmm6
movl %edx,%r12d
xorl %ecx,%r13d
shrdl $9,%r14d,%r14d
vpshufd $132,%xmm6,%xmm6
xorl %r8d,%r12d
shrdl $5,%r13d,%r13d
xorl %r10d,%r14d
vpsrldq $8,%xmm6,%xmm6
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 8(%rsp),%r9d
vpaddd %xmm6,%xmm0,%xmm0
movl %r10d,%r15d
shrdl $11,%r14d,%r14d
xorl %r8d,%r12d
vpshufd $80,%xmm0,%xmm7
xorl %r11d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r9d
vpsrld $10,%xmm7,%xmm6
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
vpsrlq $17,%xmm7,%xmm7
xorl %r11d,%esi
addl %r9d,%ebx
shrdl $2,%r14d,%r14d
vpxor %xmm7,%xmm6,%xmm6
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
vpsrlq $2,%xmm7,%xmm7
shrdl $14,%r13d,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
vpxor %xmm7,%xmm6,%xmm6
xorl %ebx,%r13d
shrdl $9,%r14d,%r14d
xorl %edx,%r12d
vpshufd $232,%xmm6,%xmm6
shrdl $5,%r13d,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vpslldq $8,%xmm6,%xmm6
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 12(%rsp),%r8d
movl %r9d,%esi
vpaddd %xmm6,%xmm0,%xmm0
shrdl $11,%r14d,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
vpaddd 0(%rbp),%xmm0,%xmm6
shrdl $6,%r13d,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
shrdl $2,%r14d,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
vmovdqa %xmm6,0(%rsp)
vpalignr $4,%xmm1,%xmm2,%xmm4
shrdl $14,%r13d,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
vpalignr $4,%xmm3,%xmm0,%xmm7
xorl %eax,%r13d
shrdl $9,%r14d,%r14d
xorl %ecx,%r12d
vpsrld $7,%xmm4,%xmm6
shrdl $5,%r13d,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vpaddd %xmm7,%xmm1,%xmm1
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %eax,%r13d
addl 16(%rsp),%edx
movl %r8d,%r15d
vpsrld $3,%xmm4,%xmm7
shrdl $11,%r14d,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
vpslld $14,%xmm4,%xmm5
shrdl $6,%r13d,%r13d
addl %r12d,%edx
andl %r15d,%esi
vpxor %xmm6,%xmm7,%xmm4
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
vpshufd $250,%xmm0,%xmm7
addl %edx,%r11d
shrdl $2,%r14d,%r14d
addl %esi,%edx
vpsrld $11,%xmm6,%xmm6
movl %r11d,%r13d
addl %edx,%r14d
shrdl $14,%r13d,%r13d
vpxor %xmm5,%xmm4,%xmm4
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
vpslld $11,%xmm5,%xmm5
shrdl $9,%r14d,%r14d
xorl %ebx,%r12d
shrdl $5,%r13d,%r13d
vpxor %xmm6,%xmm4,%xmm4
xorl %edx,%r14d
andl %r11d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r11d,%r13d
vpsrld $10,%xmm7,%xmm6
addl 20(%rsp),%ecx
movl %edx,%esi
shrdl $11,%r14d,%r14d
vpxor %xmm5,%xmm4,%xmm4
xorl %ebx,%r12d
xorl %r8d,%esi
shrdl $6,%r13d,%r13d
vpsrlq $17,%xmm7,%xmm7
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
vpaddd %xmm4,%xmm1,%xmm1
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
vpxor %xmm7,%xmm6,%xmm6
shrdl $2,%r14d,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
vpsrlq $2,%xmm7,%xmm7
addl %ecx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ecx
vpxor %xmm7,%xmm6,%xmm6
movl %r11d,%r12d
xorl %r10d,%r13d
shrdl $9,%r14d,%r14d
vpshufd $132,%xmm6,%xmm6
xorl %eax,%r12d
shrdl $5,%r13d,%r13d
xorl %ecx,%r14d
vpsrldq $8,%xmm6,%xmm6
andl %r10d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 24(%rsp),%ebx
vpaddd %xmm6,%xmm1,%xmm1
movl %ecx,%r15d
shrdl $11,%r14d,%r14d
xorl %eax,%r12d
vpshufd $80,%xmm1,%xmm7
xorl %edx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%ebx
vpsrld $10,%xmm7,%xmm6
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
vpsrlq $17,%xmm7,%xmm7
xorl %edx,%esi
addl %ebx,%r9d
shrdl $2,%r14d,%r14d
vpxor %xmm7,%xmm6,%xmm6
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
vpsrlq $2,%xmm7,%xmm7
shrdl $14,%r13d,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
vpxor %xmm7,%xmm6,%xmm6
xorl %r9d,%r13d
shrdl $9,%r14d,%r14d
xorl %r11d,%r12d
vpshufd $232,%xmm6,%xmm6
shrdl $5,%r13d,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vpslldq $8,%xmm6,%xmm6
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 28(%rsp),%eax
movl %ebx,%esi
vpaddd %xmm6,%xmm1,%xmm1
shrdl $11,%r14d,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
vpaddd 32(%rbp),%xmm1,%xmm6
shrdl $6,%r13d,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
shrdl $2,%r14d,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
vmovdqa %xmm6,16(%rsp)
vpalignr $4,%xmm2,%xmm3,%xmm4
shrdl $14,%r13d,%r13d
movl %r14d,%eax
movl %r9d,%r12d
vpalignr $4,%xmm0,%xmm1,%xmm7
xorl %r8d,%r13d
shrdl $9,%r14d,%r14d
xorl %r10d,%r12d
vpsrld $7,%xmm4,%xmm6
shrdl $5,%r13d,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vpaddd %xmm7,%xmm2,%xmm2
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 32(%rsp),%r11d
movl %eax,%r15d
vpsrld $3,%xmm4,%xmm7
shrdl $11,%r14d,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
vpslld $14,%xmm4,%xmm5
shrdl $6,%r13d,%r13d
addl %r12d,%r11d
andl %r15d,%esi
vpxor %xmm6,%xmm7,%xmm4
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
vpshufd $250,%xmm1,%xmm7
addl %r11d,%edx
shrdl $2,%r14d,%r14d
addl %esi,%r11d
vpsrld $11,%xmm6,%xmm6
movl %edx,%r13d
addl %r11d,%r14d
shrdl $14,%r13d,%r13d
vpxor %xmm5,%xmm4,%xmm4
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
vpslld $11,%xmm5,%xmm5
shrdl $9,%r14d,%r14d
xorl %r9d,%r12d
shrdl $5,%r13d,%r13d
vpxor %xmm6,%xmm4,%xmm4
xorl %r11d,%r14d
andl %edx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %edx,%r13d
vpsrld $10,%xmm7,%xmm6
addl 36(%rsp),%r10d
movl %r11d,%esi
shrdl $11,%r14d,%r14d
vpxor %xmm5,%xmm4,%xmm4
xorl %r9d,%r12d
xorl %eax,%esi
shrdl $6,%r13d,%r13d
vpsrlq $17,%xmm7,%xmm7
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
vpaddd %xmm4,%xmm2,%xmm2
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
vpxor %xmm7,%xmm6,%xmm6
shrdl $2,%r14d,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
vpsrlq $2,%xmm7,%xmm7
addl %r10d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r10d
vpxor %xmm7,%xmm6,%xmm6
movl %edx,%r12d
xorl %ecx,%r13d
shrdl $9,%r14d,%r14d
vpshufd $132,%xmm6,%xmm6
xorl %r8d,%r12d
shrdl $5,%r13d,%r13d
xorl %r10d,%r14d
vpsrldq $8,%xmm6,%xmm6
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 40(%rsp),%r9d
vpaddd %xmm6,%xmm2,%xmm2
movl %r10d,%r15d
shrdl $11,%r14d,%r14d
xorl %r8d,%r12d
vpshufd $80,%xmm2,%xmm7
xorl %r11d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r9d
vpsrld $10,%xmm7,%xmm6
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
vpsrlq $17,%xmm7,%xmm7
xorl %r11d,%esi
addl %r9d,%ebx
shrdl $2,%r14d,%r14d
vpxor %xmm7,%xmm6,%xmm6
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
vpsrlq $2,%xmm7,%xmm7
shrdl $14,%r13d,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
vpxor %xmm7,%xmm6,%xmm6
xorl %ebx,%r13d
shrdl $9,%r14d,%r14d
xorl %edx,%r12d
vpshufd $232,%xmm6,%xmm6
shrdl $5,%r13d,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vpslldq $8,%xmm6,%xmm6
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 44(%rsp),%r8d
movl %r9d,%esi
vpaddd %xmm6,%xmm2,%xmm2
shrdl $11,%r14d,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
vpaddd 64(%rbp),%xmm2,%xmm6
shrdl $6,%r13d,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
shrdl $2,%r14d,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
vmovdqa %xmm6,32(%rsp)
vpalignr $4,%xmm3,%xmm0,%xmm4
shrdl $14,%r13d,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
vpalignr $4,%xmm1,%xmm2,%xmm7
xorl %eax,%r13d
shrdl $9,%r14d,%r14d
xorl %ecx,%r12d
vpsrld $7,%xmm4,%xmm6
shrdl $5,%r13d,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vpaddd %xmm7,%xmm3,%xmm3
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %eax,%r13d
addl 48(%rsp),%edx
movl %r8d,%r15d
vpsrld $3,%xmm4,%xmm7
shrdl $11,%r14d,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
vpslld $14,%xmm4,%xmm5
shrdl $6,%r13d,%r13d
addl %r12d,%edx
andl %r15d,%esi
vpxor %xmm6,%xmm7,%xmm4
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
vpshufd $250,%xmm2,%xmm7
addl %edx,%r11d
shrdl $2,%r14d,%r14d
addl %esi,%edx
vpsrld $11,%xmm6,%xmm6
movl %r11d,%r13d
addl %edx,%r14d
shrdl $14,%r13d,%r13d
vpxor %xmm5,%xmm4,%xmm4
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
vpslld $11,%xmm5,%xmm5
shrdl $9,%r14d,%r14d
xorl %ebx,%r12d
shrdl $5,%r13d,%r13d
vpxor %xmm6,%xmm4,%xmm4
xorl %edx,%r14d
andl %r11d,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r11d,%r13d
vpsrld $10,%xmm7,%xmm6
addl 52(%rsp),%ecx
movl %edx,%esi
shrdl $11,%r14d,%r14d
vpxor %xmm5,%xmm4,%xmm4
xorl %ebx,%r12d
xorl %r8d,%esi
shrdl $6,%r13d,%r13d
vpsrlq $17,%xmm7,%xmm7
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
vpaddd %xmm4,%xmm3,%xmm3
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
vpxor %xmm7,%xmm6,%xmm6
shrdl $2,%r14d,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
vpsrlq $2,%xmm7,%xmm7
addl %ecx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ecx
vpxor %xmm7,%xmm6,%xmm6
movl %r11d,%r12d
xorl %r10d,%r13d
shrdl $9,%r14d,%r14d
vpshufd $132,%xmm6,%xmm6
xorl %eax,%r12d
shrdl $5,%r13d,%r13d
xorl %ecx,%r14d
vpsrldq $8,%xmm6,%xmm6
andl %r10d,%r12d
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 56(%rsp),%ebx
vpaddd %xmm6,%xmm3,%xmm3
movl %ecx,%r15d
shrdl $11,%r14d,%r14d
xorl %eax,%r12d
vpshufd $80,%xmm3,%xmm7
xorl %edx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%ebx
vpsrld $10,%xmm7,%xmm6
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
vpsrlq $17,%xmm7,%xmm7
xorl %edx,%esi
addl %ebx,%r9d
shrdl $2,%r14d,%r14d
vpxor %xmm7,%xmm6,%xmm6
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
vpsrlq $2,%xmm7,%xmm7
shrdl $14,%r13d,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
vpxor %xmm7,%xmm6,%xmm6
xorl %r9d,%r13d
shrdl $9,%r14d,%r14d
xorl %r11d,%r12d
vpshufd $232,%xmm6,%xmm6
shrdl $5,%r13d,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vpslldq $8,%xmm6,%xmm6
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 60(%rsp),%eax
movl %ebx,%esi
vpaddd %xmm6,%xmm3,%xmm3
shrdl $11,%r14d,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
vpaddd 96(%rbp),%xmm3,%xmm6
shrdl $6,%r13d,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
shrdl $2,%r14d,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
vmovdqa %xmm6,48(%rsp)
movq 64+0(%rsp),%r12
vpand %xmm14,%xmm11,%xmm11
movq 64+8(%rsp),%r15
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r15,%r12,1)
leaq 16(%r12),%r12
cmpb $0,131(%rbp)
jne .Lavx_00_47
vmovdqu (%r12),%xmm9
movq %r12,64+0(%rsp)
shrdl $14,%r13d,%r13d
movl %r14d,%eax
movl %r9d,%r12d
xorl %r8d,%r13d
shrdl $9,%r14d,%r14d
xorl %r10d,%r12d
shrdl $5,%r13d,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 0(%rsp),%r11d
movl %eax,%r15d
shrdl $11,%r14d,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r11d
andl %r15d,%esi
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
addl %r11d,%edx
shrdl $2,%r14d,%r14d
addl %esi,%r11d
movl %edx,%r13d
addl %r11d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
shrdl $9,%r14d,%r14d
xorl %r9d,%r12d
shrdl $5,%r13d,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vpxor %xmm8,%xmm9,%xmm9
xorl %edx,%r13d
addl 4(%rsp),%r10d
movl %r11d,%esi
shrdl $11,%r14d,%r14d
xorl %r9d,%r12d
xorl %eax,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
shrdl $2,%r14d,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
addl %r10d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r10d
movl %edx,%r12d
xorl %ecx,%r13d
shrdl $9,%r14d,%r14d
xorl %r8d,%r12d
shrdl $5,%r13d,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 8(%rsp),%r9d
movl %r10d,%r15d
shrdl $11,%r14d,%r14d
xorl %r8d,%r12d
xorl %r11d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
shrdl $2,%r14d,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
shrdl $9,%r14d,%r14d
xorl %edx,%r12d
shrdl $5,%r13d,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 12(%rsp),%r8d
movl %r9d,%esi
shrdl $11,%r14d,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
shrdl $2,%r14d,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
xorl %eax,%r13d
shrdl $9,%r14d,%r14d
xorl %ecx,%r12d
shrdl $5,%r13d,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %eax,%r13d
addl 16(%rsp),%edx
movl %r8d,%r15d
shrdl $11,%r14d,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%edx
andl %r15d,%esi
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
addl %edx,%r11d
shrdl $2,%r14d,%r14d
addl %esi,%edx
movl %r11d,%r13d
addl %edx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
shrdl $9,%r14d,%r14d
xorl %ebx,%r12d
shrdl $5,%r13d,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r11d,%r13d
addl 20(%rsp),%ecx
movl %edx,%esi
shrdl $11,%r14d,%r14d
xorl %ebx,%r12d
xorl %r8d,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
shrdl $2,%r14d,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
addl %ecx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ecx
movl %r11d,%r12d
xorl %r10d,%r13d
shrdl $9,%r14d,%r14d
xorl %eax,%r12d
shrdl $5,%r13d,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 24(%rsp),%ebx
movl %ecx,%r15d
shrdl $11,%r14d,%r14d
xorl %eax,%r12d
xorl %edx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
shrdl $2,%r14d,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
shrdl $9,%r14d,%r14d
xorl %r11d,%r12d
shrdl $5,%r13d,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 28(%rsp),%eax
movl %ebx,%esi
shrdl $11,%r14d,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
shrdl $2,%r14d,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%eax
movl %r9d,%r12d
xorl %r8d,%r13d
shrdl $9,%r14d,%r14d
xorl %r10d,%r12d
shrdl $5,%r13d,%r13d
xorl %eax,%r14d
andl %r8d,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r8d,%r13d
addl 32(%rsp),%r11d
movl %eax,%r15d
shrdl $11,%r14d,%r14d
xorl %r10d,%r12d
xorl %ebx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r11d
andl %r15d,%esi
xorl %eax,%r14d
addl %r13d,%r11d
xorl %ebx,%esi
addl %r11d,%edx
shrdl $2,%r14d,%r14d
addl %esi,%r11d
movl %edx,%r13d
addl %r11d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r11d
movl %r8d,%r12d
xorl %edx,%r13d
shrdl $9,%r14d,%r14d
xorl %r9d,%r12d
shrdl $5,%r13d,%r13d
xorl %r11d,%r14d
andl %edx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %edx,%r13d
addl 36(%rsp),%r10d
movl %r11d,%esi
shrdl $11,%r14d,%r14d
xorl %r9d,%r12d
xorl %eax,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%r10d
andl %esi,%r15d
xorl %r11d,%r14d
addl %r13d,%r10d
xorl %eax,%r15d
addl %r10d,%ecx
shrdl $2,%r14d,%r14d
addl %r15d,%r10d
movl %ecx,%r13d
addl %r10d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r10d
movl %edx,%r12d
xorl %ecx,%r13d
shrdl $9,%r14d,%r14d
xorl %r8d,%r12d
shrdl $5,%r13d,%r13d
xorl %r10d,%r14d
andl %ecx,%r12d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %ecx,%r13d
addl 40(%rsp),%r9d
movl %r10d,%r15d
shrdl $11,%r14d,%r14d
xorl %r8d,%r12d
xorl %r11d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%r9d
andl %r15d,%esi
xorl %r10d,%r14d
addl %r13d,%r9d
xorl %r11d,%esi
addl %r9d,%ebx
shrdl $2,%r14d,%r14d
addl %esi,%r9d
movl %ebx,%r13d
addl %r9d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r9d
movl %ecx,%r12d
xorl %ebx,%r13d
shrdl $9,%r14d,%r14d
xorl %edx,%r12d
shrdl $5,%r13d,%r13d
xorl %r9d,%r14d
andl %ebx,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %ebx,%r13d
addl 44(%rsp),%r8d
movl %r9d,%esi
shrdl $11,%r14d,%r14d
xorl %edx,%r12d
xorl %r10d,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%r8d
andl %esi,%r15d
xorl %r9d,%r14d
addl %r13d,%r8d
xorl %r10d,%r15d
addl %r8d,%eax
shrdl $2,%r14d,%r14d
addl %r15d,%r8d
movl %eax,%r13d
addl %r8d,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%r8d
movl %ebx,%r12d
xorl %eax,%r13d
shrdl $9,%r14d,%r14d
xorl %ecx,%r12d
shrdl $5,%r13d,%r13d
xorl %r8d,%r14d
andl %eax,%r12d
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %eax,%r13d
addl 48(%rsp),%edx
movl %r8d,%r15d
shrdl $11,%r14d,%r14d
xorl %ecx,%r12d
xorl %r9d,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%edx
andl %r15d,%esi
xorl %r8d,%r14d
addl %r13d,%edx
xorl %r9d,%esi
addl %edx,%r11d
shrdl $2,%r14d,%r14d
addl %esi,%edx
movl %r11d,%r13d
addl %edx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%edx
movl %eax,%r12d
xorl %r11d,%r13d
shrdl $9,%r14d,%r14d
xorl %ebx,%r12d
shrdl $5,%r13d,%r13d
xorl %edx,%r14d
andl %r11d,%r12d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r11d,%r13d
addl 52(%rsp),%ecx
movl %edx,%esi
shrdl $11,%r14d,%r14d
xorl %ebx,%r12d
xorl %r8d,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%ecx
andl %esi,%r15d
xorl %edx,%r14d
addl %r13d,%ecx
xorl %r8d,%r15d
addl %ecx,%r10d
shrdl $2,%r14d,%r14d
addl %r15d,%ecx
movl %r10d,%r13d
addl %ecx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ecx
movl %r11d,%r12d
xorl %r10d,%r13d
shrdl $9,%r14d,%r14d
xorl %eax,%r12d
shrdl $5,%r13d,%r13d
xorl %ecx,%r14d
andl %r10d,%r12d
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r10d,%r13d
addl 56(%rsp),%ebx
movl %ecx,%r15d
shrdl $11,%r14d,%r14d
xorl %eax,%r12d
xorl %edx,%r15d
shrdl $6,%r13d,%r13d
addl %r12d,%ebx
andl %r15d,%esi
xorl %ecx,%r14d
addl %r13d,%ebx
xorl %edx,%esi
addl %ebx,%r9d
shrdl $2,%r14d,%r14d
addl %esi,%ebx
movl %r9d,%r13d
addl %ebx,%r14d
shrdl $14,%r13d,%r13d
movl %r14d,%ebx
movl %r10d,%r12d
xorl %r9d,%r13d
shrdl $9,%r14d,%r14d
xorl %r11d,%r12d
shrdl $5,%r13d,%r13d
xorl %ebx,%r14d
andl %r9d,%r12d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r9d,%r13d
addl 60(%rsp),%eax
movl %ebx,%esi
shrdl $11,%r14d,%r14d
xorl %r11d,%r12d
xorl %ecx,%esi
shrdl $6,%r13d,%r13d
addl %r12d,%eax
andl %esi,%r15d
xorl %ebx,%r14d
addl %r13d,%eax
xorl %ecx,%r15d
addl %eax,%r8d
shrdl $2,%r14d,%r14d
addl %r15d,%eax
movl %r8d,%r13d
addl %eax,%r14d
movq 64+0(%rsp),%r12
movq 64+8(%rsp),%r13
movq 64+40(%rsp),%r15
movq 64+48(%rsp),%rsi
vpand %xmm14,%xmm11,%xmm11
movl %r14d,%eax
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r12,%r13,1)
leaq 16(%r12),%r12
addl 0(%r15),%eax
addl 4(%r15),%ebx
addl 8(%r15),%ecx
addl 12(%r15),%edx
addl 16(%r15),%r8d
addl 20(%r15),%r9d
addl 24(%r15),%r10d
addl 28(%r15),%r11d
cmpq 64+16(%rsp),%r12
movl %eax,0(%r15)
movl %ebx,4(%r15)
movl %ecx,8(%r15)
movl %edx,12(%r15)
movl %r8d,16(%r15)
movl %r9d,20(%r15)
movl %r10d,24(%r15)
movl %r11d,28(%r15)
jb .Lloop_avx
movq 64+32(%rsp),%r8
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
.type aesni_cbc_sha256_enc_avx2,@function
.align 64
aesni_cbc_sha256_enc_avx2:
.cfi_startproc
.Lavx2_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $576,%rsp
andq $-1024,%rsp
addq $448,%rsp
shlq $6,%rdx
subq %rdi,%rsi
subq %rdi,%r10
addq %rdi,%rdx
movq %rdx,64+16(%rsp)
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
vzeroall
movq %rdi,%r13
vpinsrq $1,%rsi,%xmm15,%xmm15
leaq 128(%rcx),%rdi
leaq K256+544(%rip),%r12
movl 240-128(%rdi),%r14d
movq %r9,%r15
movq %r10,%rsi
vmovdqu (%r8),%xmm8
leaq -9(%r14),%r14
vmovdqa 0(%r12,%r14,8),%xmm14
vmovdqa 16(%r12,%r14,8),%xmm13
vmovdqa 32(%r12,%r14,8),%xmm12
subq $-64,%r13
movl 0(%r15),%eax
leaq (%rsi,%r13,1),%r12
movl 4(%r15),%ebx
cmpq %rdx,%r13
movl 8(%r15),%ecx
cmoveq %rsp,%r12
movl 12(%r15),%edx
movl 16(%r15),%r8d
movl 20(%r15),%r9d
movl 24(%r15),%r10d
movl 28(%r15),%r11d
vmovdqu 0-128(%rdi),%xmm10
jmp .Loop_avx2
.align 16
.Loop_avx2:
vmovdqa K256+512(%rip),%ymm7
vmovdqu -64+0(%rsi,%r13,1),%xmm0
vmovdqu -64+16(%rsi,%r13,1),%xmm1
vmovdqu -64+32(%rsi,%r13,1),%xmm2
vmovdqu -64+48(%rsi,%r13,1),%xmm3
vinserti128 $1,(%r12),%ymm0,%ymm0
vinserti128 $1,16(%r12),%ymm1,%ymm1
vpshufb %ymm7,%ymm0,%ymm0
vinserti128 $1,32(%r12),%ymm2,%ymm2
vpshufb %ymm7,%ymm1,%ymm1
vinserti128 $1,48(%r12),%ymm3,%ymm3
leaq K256(%rip),%rbp
vpshufb %ymm7,%ymm2,%ymm2
leaq -64(%r13),%r13
vpaddd 0(%rbp),%ymm0,%ymm4
vpshufb %ymm7,%ymm3,%ymm3
vpaddd 32(%rbp),%ymm1,%ymm5
vpaddd 64(%rbp),%ymm2,%ymm6
vpaddd 96(%rbp),%ymm3,%ymm7
vmovdqa %ymm4,0(%rsp)
xorl %r14d,%r14d
vmovdqa %ymm5,32(%rsp)
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
leaq -64(%rsp),%rsp
movq %rsi,-8(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
movl %ebx,%esi
vmovdqa %ymm6,0(%rsp)
xorl %ecx,%esi
vmovdqa %ymm7,32(%rsp)
movl %r9d,%r12d
subq $-32*4,%rbp
jmp .Lavx2_00_47
.align 16
.Lavx2_00_47:
vmovdqu (%r13),%xmm9
vpinsrq $0,%r13,%xmm15,%xmm15
leaq -64(%rsp),%rsp
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
pushq 64-8(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
leaq 8(%rsp),%rsp
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
vpalignr $4,%ymm0,%ymm1,%ymm4
addl 0+128(%rsp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
vpalignr $4,%ymm2,%ymm3,%ymm7
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
vpsrld $7,%ymm4,%ymm6
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
vpaddd %ymm7,%ymm0,%ymm0
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
vpsrld $3,%ymm4,%ymm7
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
vpslld $14,%ymm4,%ymm5
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
vpxor %ymm6,%ymm7,%ymm4
andl %r15d,%esi
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
vpshufd $250,%ymm3,%ymm7
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
vpsrld $11,%ymm6,%ymm6
addl 4+128(%rsp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
vpxor %ymm5,%ymm4,%ymm4
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
vpslld $11,%ymm5,%ymm5
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
vpxor %ymm6,%ymm4,%ymm4
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
vpsrld $10,%ymm7,%ymm6
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
vpxor %ymm5,%ymm4,%ymm4
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
vpsrlq $17,%ymm7,%ymm7
andl %esi,%r15d
vpxor %xmm8,%xmm9,%xmm9
xorl %r12d,%r14d
xorl %eax,%r15d
vpaddd %ymm4,%ymm0,%ymm0
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
vpxor %ymm7,%ymm6,%ymm6
addl 8+128(%rsp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
vpsrlq $2,%ymm7,%ymm7
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
vpxor %ymm7,%ymm6,%ymm6
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
vpshufd $132,%ymm6,%ymm6
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
vpsrldq $8,%ymm6,%ymm6
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
vpaddd %ymm6,%ymm0,%ymm0
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
vpshufd $80,%ymm0,%ymm7
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
vpsrld $10,%ymm7,%ymm6
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
vpsrlq $17,%ymm7,%ymm7
addl 12+128(%rsp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
vpxor %ymm7,%ymm6,%ymm6
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
vpsrlq $2,%ymm7,%ymm7
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
vpxor %ymm7,%ymm6,%ymm6
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
vpshufd $232,%ymm6,%ymm6
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
vpslldq $8,%ymm6,%ymm6
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
vpaddd %ymm6,%ymm0,%ymm0
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
vpaddd 0(%rbp),%ymm0,%ymm6
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
vmovdqa %ymm6,0(%rsp)
vpalignr $4,%ymm1,%ymm2,%ymm4
addl 32+128(%rsp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
vpalignr $4,%ymm3,%ymm0,%ymm7
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
vpsrld $7,%ymm4,%ymm6
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
vpaddd %ymm7,%ymm1,%ymm1
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
vpsrld $3,%ymm4,%ymm7
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
vpslld $14,%ymm4,%ymm5
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
vpxor %ymm6,%ymm7,%ymm4
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
vpshufd $250,%ymm0,%ymm7
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
vpsrld $11,%ymm6,%ymm6
addl 36+128(%rsp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
vpxor %ymm5,%ymm4,%ymm4
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
vpslld $11,%ymm5,%ymm5
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
vpxor %ymm6,%ymm4,%ymm4
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
vpsrld $10,%ymm7,%ymm6
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
vpxor %ymm5,%ymm4,%ymm4
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
vpsrlq $17,%ymm7,%ymm7
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
vpaddd %ymm4,%ymm1,%ymm1
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
vpxor %ymm7,%ymm6,%ymm6
addl 40+128(%rsp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
vpsrlq $2,%ymm7,%ymm7
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
vpxor %ymm7,%ymm6,%ymm6
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
vpshufd $132,%ymm6,%ymm6
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
vpsrldq $8,%ymm6,%ymm6
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
vpaddd %ymm6,%ymm1,%ymm1
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
vpshufd $80,%ymm1,%ymm7
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
vpsrld $10,%ymm7,%ymm6
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
vpsrlq $17,%ymm7,%ymm7
addl 44+128(%rsp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
vpxor %ymm7,%ymm6,%ymm6
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
vpsrlq $2,%ymm7,%ymm7
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
vpxor %ymm7,%ymm6,%ymm6
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
vpshufd $232,%ymm6,%ymm6
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
vpslldq $8,%ymm6,%ymm6
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
vpaddd %ymm6,%ymm1,%ymm1
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
vpaddd 32(%rbp),%ymm1,%ymm6
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
vmovdqa %ymm6,32(%rsp)
leaq -64(%rsp),%rsp
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
pushq 64-8(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
leaq 8(%rsp),%rsp
.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
vpalignr $4,%ymm2,%ymm3,%ymm4
addl 0+128(%rsp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
vpalignr $4,%ymm0,%ymm1,%ymm7
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
vpsrld $7,%ymm4,%ymm6
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
vpaddd %ymm7,%ymm2,%ymm2
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
vpsrld $3,%ymm4,%ymm7
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
vpslld $14,%ymm4,%ymm5
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
vpxor %ymm6,%ymm7,%ymm4
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
vpshufd $250,%ymm1,%ymm7
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
vpsrld $11,%ymm6,%ymm6
addl 4+128(%rsp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
vpxor %ymm5,%ymm4,%ymm4
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
vpslld $11,%ymm5,%ymm5
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
vpxor %ymm6,%ymm4,%ymm4
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
vpsrld $10,%ymm7,%ymm6
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
vpxor %ymm5,%ymm4,%ymm4
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
vpsrlq $17,%ymm7,%ymm7
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %eax,%r15d
vpaddd %ymm4,%ymm2,%ymm2
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
vpxor %ymm7,%ymm6,%ymm6
addl 8+128(%rsp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
vpsrlq $2,%ymm7,%ymm7
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
vpxor %ymm7,%ymm6,%ymm6
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
vpshufd $132,%ymm6,%ymm6
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
vpsrldq $8,%ymm6,%ymm6
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
vpaddd %ymm6,%ymm2,%ymm2
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
vpshufd $80,%ymm2,%ymm7
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
vpsrld $10,%ymm7,%ymm6
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
vpsrlq $17,%ymm7,%ymm7
addl 12+128(%rsp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
vpxor %ymm7,%ymm6,%ymm6
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
vpsrlq $2,%ymm7,%ymm7
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
vpxor %ymm7,%ymm6,%ymm6
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
vpshufd $232,%ymm6,%ymm6
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
vpslldq $8,%ymm6,%ymm6
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
vpaddd %ymm6,%ymm2,%ymm2
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
vpaddd 64(%rbp),%ymm2,%ymm6
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
vmovdqa %ymm6,0(%rsp)
vpalignr $4,%ymm3,%ymm0,%ymm4
addl 32+128(%rsp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
vpalignr $4,%ymm1,%ymm2,%ymm7
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
vpsrld $7,%ymm4,%ymm6
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
vpaddd %ymm7,%ymm3,%ymm3
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
vpsrld $3,%ymm4,%ymm7
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
vpslld $14,%ymm4,%ymm5
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
vpxor %ymm6,%ymm7,%ymm4
andl %r15d,%esi
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
vpshufd $250,%ymm2,%ymm7
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
vpsrld $11,%ymm6,%ymm6
addl 36+128(%rsp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
vpxor %ymm5,%ymm4,%ymm4
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
vpslld $11,%ymm5,%ymm5
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
vpxor %ymm6,%ymm4,%ymm4
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
vpsrld $10,%ymm7,%ymm6
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
vpxor %ymm5,%ymm4,%ymm4
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
vpsrlq $17,%ymm7,%ymm7
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
vpaddd %ymm4,%ymm3,%ymm3
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
vpxor %ymm7,%ymm6,%ymm6
addl 40+128(%rsp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
vpsrlq $2,%ymm7,%ymm7
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
vpxor %ymm7,%ymm6,%ymm6
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
vpshufd $132,%ymm6,%ymm6
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
vpsrldq $8,%ymm6,%ymm6
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
vpaddd %ymm6,%ymm3,%ymm3
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
vpshufd $80,%ymm3,%ymm7
andl %r15d,%esi
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
vpsrld $10,%ymm7,%ymm6
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
vpsrlq $17,%ymm7,%ymm7
addl 44+128(%rsp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
vpxor %ymm7,%ymm6,%ymm6
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
vpsrlq $2,%ymm7,%ymm7
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
vpxor %ymm7,%ymm6,%ymm6
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
vpshufd $232,%ymm6,%ymm6
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
vpslldq $8,%ymm6,%ymm6
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
vpaddd %ymm6,%ymm3,%ymm3
andl %esi,%r15d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
vpaddd 96(%rbp),%ymm3,%ymm6
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
vmovdqa %ymm6,32(%rsp)
vmovq %xmm15,%r13
vpextrq $1,%xmm15,%r15
vpand %xmm14,%xmm11,%xmm11
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r15,%r13,1)
leaq 16(%r13),%r13
leaq 128(%rbp),%rbp
cmpb $0,3(%rbp)
jne .Lavx2_00_47
vmovdqu (%r13),%xmm9
vpinsrq $0,%r13,%xmm15,%xmm15
addl 0+64(%rsp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
andl %r15d,%esi
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
addl 4+64(%rsp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
andl %esi,%r15d
vpxor %xmm8,%xmm9,%xmm9
xorl %r12d,%r14d
xorl %eax,%r15d
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
addl 8+64(%rsp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
addl 12+64(%rsp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
addl 32+64(%rsp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
addl 36+64(%rsp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
addl 40+64(%rsp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
addl 44+64(%rsp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
addl 0(%rsp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
addl 4(%rsp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %eax,%r15d
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
addl 8(%rsp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
addl 12(%rsp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
addl 32(%rsp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
andl %r15d,%esi
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
addl 36(%rsp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
addl 40(%rsp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
andl %r15d,%esi
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
addl 44(%rsp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
andl %esi,%r15d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
vpextrq $1,%xmm15,%r12
vmovq %xmm15,%r13
movq 552(%rsp),%r15
addl %r14d,%eax
leaq 448(%rsp),%rbp
vpand %xmm14,%xmm11,%xmm11
vpor %xmm11,%xmm8,%xmm8
vmovdqu %xmm8,(%r12,%r13,1)
leaq 16(%r13),%r13
addl 0(%r15),%eax
addl 4(%r15),%ebx
addl 8(%r15),%ecx
addl 12(%r15),%edx
addl 16(%r15),%r8d
addl 20(%r15),%r9d
addl 24(%r15),%r10d
addl 28(%r15),%r11d
movl %eax,0(%r15)
movl %ebx,4(%r15)
movl %ecx,8(%r15)
movl %edx,12(%r15)
movl %r8d,16(%r15)
movl %r9d,20(%r15)
movl %r10d,24(%r15)
movl %r11d,28(%r15)
cmpq 80(%rbp),%r13
je .Ldone_avx2
xorl %r14d,%r14d
movl %ebx,%esi
movl %r9d,%r12d
xorl %ecx,%esi
jmp .Lower_avx2
.align 16
.Lower_avx2:
vmovdqu (%r13),%xmm9
vpinsrq $0,%r13,%xmm15,%xmm15
addl 0+16(%rbp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
andl %r15d,%esi
vpxor %xmm10,%xmm9,%xmm9
vmovdqu 16-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
addl 4+16(%rbp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
andl %esi,%r15d
vpxor %xmm8,%xmm9,%xmm9
xorl %r12d,%r14d
xorl %eax,%r15d
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
addl 8+16(%rbp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 32-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
addl 12+16(%rbp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 48-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
addl 32+16(%rbp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 64-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
addl 36+16(%rbp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 80-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
addl 40+16(%rbp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 96-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
addl 44+16(%rbp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 112-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
leaq -64(%rbp),%rbp
addl 0+16(%rbp),%r11d
andl %r8d,%r12d
rorxl $25,%r8d,%r13d
rorxl $11,%r8d,%r15d
leal (%rax,%r14,1),%eax
leal (%r11,%r12,1),%r11d
andnl %r10d,%r8d,%r12d
xorl %r15d,%r13d
rorxl $6,%r8d,%r14d
leal (%r11,%r12,1),%r11d
xorl %r14d,%r13d
movl %eax,%r15d
rorxl $22,%eax,%r12d
leal (%r11,%r13,1),%r11d
xorl %ebx,%r15d
rorxl $13,%eax,%r14d
rorxl $2,%eax,%r13d
leal (%rdx,%r11,1),%edx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 128-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ebx,%esi
xorl %r13d,%r14d
leal (%r11,%rsi,1),%r11d
movl %r8d,%r12d
addl 4+16(%rbp),%r10d
andl %edx,%r12d
rorxl $25,%edx,%r13d
rorxl $11,%edx,%esi
leal (%r11,%r14,1),%r11d
leal (%r10,%r12,1),%r10d
andnl %r9d,%edx,%r12d
xorl %esi,%r13d
rorxl $6,%edx,%r14d
leal (%r10,%r12,1),%r10d
xorl %r14d,%r13d
movl %r11d,%esi
rorxl $22,%r11d,%r12d
leal (%r10,%r13,1),%r10d
xorl %eax,%esi
rorxl $13,%r11d,%r14d
rorxl $2,%r11d,%r13d
leal (%rcx,%r10,1),%ecx
andl %esi,%r15d
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 144-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %eax,%r15d
xorl %r13d,%r14d
leal (%r10,%r15,1),%r10d
movl %edx,%r12d
addl 8+16(%rbp),%r9d
andl %ecx,%r12d
rorxl $25,%ecx,%r13d
rorxl $11,%ecx,%r15d
leal (%r10,%r14,1),%r10d
leal (%r9,%r12,1),%r9d
andnl %r8d,%ecx,%r12d
xorl %r15d,%r13d
rorxl $6,%ecx,%r14d
leal (%r9,%r12,1),%r9d
xorl %r14d,%r13d
movl %r10d,%r15d
rorxl $22,%r10d,%r12d
leal (%r9,%r13,1),%r9d
xorl %r11d,%r15d
rorxl $13,%r10d,%r14d
rorxl $2,%r10d,%r13d
leal (%rbx,%r9,1),%ebx
andl %r15d,%esi
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 160-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r11d,%esi
xorl %r13d,%r14d
leal (%r9,%rsi,1),%r9d
movl %ecx,%r12d
addl 12+16(%rbp),%r8d
andl %ebx,%r12d
rorxl $25,%ebx,%r13d
rorxl $11,%ebx,%esi
leal (%r9,%r14,1),%r9d
leal (%r8,%r12,1),%r8d
andnl %edx,%ebx,%r12d
xorl %esi,%r13d
rorxl $6,%ebx,%r14d
leal (%r8,%r12,1),%r8d
xorl %r14d,%r13d
movl %r9d,%esi
rorxl $22,%r9d,%r12d
leal (%r8,%r13,1),%r8d
xorl %r10d,%esi
rorxl $13,%r9d,%r14d
rorxl $2,%r9d,%r13d
leal (%rax,%r8,1),%eax
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 176-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r10d,%r15d
xorl %r13d,%r14d
leal (%r8,%r15,1),%r8d
movl %ebx,%r12d
addl 32+16(%rbp),%edx
andl %eax,%r12d
rorxl $25,%eax,%r13d
rorxl $11,%eax,%r15d
leal (%r8,%r14,1),%r8d
leal (%rdx,%r12,1),%edx
andnl %ecx,%eax,%r12d
xorl %r15d,%r13d
rorxl $6,%eax,%r14d
leal (%rdx,%r12,1),%edx
xorl %r14d,%r13d
movl %r8d,%r15d
rorxl $22,%r8d,%r12d
leal (%rdx,%r13,1),%edx
xorl %r9d,%r15d
rorxl $13,%r8d,%r14d
rorxl $2,%r8d,%r13d
leal (%r11,%rdx,1),%r11d
andl %r15d,%esi
vpand %xmm12,%xmm11,%xmm8
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 192-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r9d,%esi
xorl %r13d,%r14d
leal (%rdx,%rsi,1),%edx
movl %eax,%r12d
addl 36+16(%rbp),%ecx
andl %r11d,%r12d
rorxl $25,%r11d,%r13d
rorxl $11,%r11d,%esi
leal (%rdx,%r14,1),%edx
leal (%rcx,%r12,1),%ecx
andnl %ebx,%r11d,%r12d
xorl %esi,%r13d
rorxl $6,%r11d,%r14d
leal (%rcx,%r12,1),%ecx
xorl %r14d,%r13d
movl %edx,%esi
rorxl $22,%edx,%r12d
leal (%rcx,%r13,1),%ecx
xorl %r8d,%esi
rorxl $13,%edx,%r14d
rorxl $2,%edx,%r13d
leal (%r10,%rcx,1),%r10d
andl %esi,%r15d
vaesenclast %xmm10,%xmm9,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 208-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %r8d,%r15d
xorl %r13d,%r14d
leal (%rcx,%r15,1),%ecx
movl %r11d,%r12d
addl 40+16(%rbp),%ebx
andl %r10d,%r12d
rorxl $25,%r10d,%r13d
rorxl $11,%r10d,%r15d
leal (%rcx,%r14,1),%ecx
leal (%rbx,%r12,1),%ebx
andnl %eax,%r10d,%r12d
xorl %r15d,%r13d
rorxl $6,%r10d,%r14d
leal (%rbx,%r12,1),%ebx
xorl %r14d,%r13d
movl %ecx,%r15d
rorxl $22,%ecx,%r12d
leal (%rbx,%r13,1),%ebx
xorl %edx,%r15d
rorxl $13,%ecx,%r14d
rorxl $2,%ecx,%r13d
leal (%r9,%rbx,1),%r9d
andl %r15d,%esi
vpand %xmm13,%xmm11,%xmm11
vaesenc %xmm10,%xmm9,%xmm9
vmovdqu 224-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %edx,%esi
xorl %r13d,%r14d
leal (%rbx,%rsi,1),%ebx
movl %r10d,%r12d
addl 44+16(%rbp),%eax
andl %r9d,%r12d
rorxl $25,%r9d,%r13d
rorxl $11,%r9d,%esi
leal (%rbx,%r14,1),%ebx
leal (%rax,%r12,1),%eax
andnl %r11d,%r9d,%r12d
xorl %esi,%r13d
rorxl $6,%r9d,%r14d
leal (%rax,%r12,1),%eax
xorl %r14d,%r13d
movl %ebx,%esi
rorxl $22,%ebx,%r12d
leal (%rax,%r13,1),%eax
xorl %ecx,%esi
rorxl $13,%ebx,%r14d
rorxl $2,%ebx,%r13d
leal (%r8,%rax,1),%r8d
andl %esi,%r15d
vpor %xmm11,%xmm8,%xmm8
vaesenclast %xmm10,%xmm9,%xmm11
vmovdqu 0-128(%rdi),%xmm10
xorl %r12d,%r14d
xorl %ecx,%r15d
xorl %r13d,%r14d
leal (%rax,%r15,1),%eax
movl %r9d,%r12d
vmovq %xmm15,%r13
vpextrq $1,%xmm15,%r15
vpand %xmm14,%xmm11,%xmm11
vpor %xmm11,%xmm8,%xmm8
leaq -64(%rbp),%rbp
vmovdqu %xmm8,(%r15,%r13,1)
leaq 16(%r13),%r13
cmpq %rsp,%rbp
jae .Lower_avx2
movq 552(%rsp),%r15
leaq 64(%r13),%r13
movq 560(%rsp),%rsi
addl %r14d,%eax
leaq 448(%rsp),%rsp
addl 0(%r15),%eax
addl 4(%r15),%ebx
addl 8(%r15),%ecx
addl 12(%r15),%edx
addl 16(%r15),%r8d
addl 20(%r15),%r9d
addl 24(%r15),%r10d
leaq (%rsi,%r13,1),%r12
addl 28(%r15),%r11d
cmpq 64+16(%rsp),%r13
movl %eax,0(%r15)
cmoveq %rsp,%r12
movl %ebx,4(%r15)
movl %ecx,8(%r15)
movl %edx,12(%r15)
movl %r8d,16(%r15)
movl %r9d,20(%r15)
movl %r10d,24(%r15)
movl %r11d,28(%r15)
jbe .Loop_avx2
leaq (%rsp),%rbp
.cfi_escape 0x0f,0x06,0x76,0xf8,0x00,0x06,0x23,0x08
.Ldone_avx2:
movq 64+32(%rbp),%r8
movq 64+56(%rbp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
.type aesni_cbc_sha256_enc_shaext,@function
.align 32
aesni_cbc_sha256_enc_shaext:
.cfi_startproc
movq 8(%rsp),%r10
leaq K256+128(%rip),%rax
movdqu (%r9),%xmm1
movdqu 16(%r9),%xmm2
movdqa 512-128(%rax),%xmm3
movl 240(%rcx),%r11d
subq %rdi,%rsi
movups (%rcx),%xmm15
movups (%r8),%xmm6
movups 16(%rcx),%xmm4
leaq 112(%rcx),%rcx
pshufd $0x1b,%xmm1,%xmm0
pshufd $0xb1,%xmm1,%xmm1
pshufd $0x1b,%xmm2,%xmm2
movdqa %xmm3,%xmm7
.byte 102,15,58,15,202,8
punpcklqdq %xmm0,%xmm2
jmp .Loop_shaext
.align 16
.Loop_shaext:
movdqu (%r10),%xmm10
movdqu 16(%r10),%xmm11
movdqu 32(%r10),%xmm12
.byte 102,68,15,56,0,211
movdqu 48(%r10),%xmm13
movdqa 0-128(%rax),%xmm0
paddd %xmm10,%xmm0
.byte 102,68,15,56,0,219
movdqa %xmm2,%xmm9
movdqa %xmm1,%xmm8
movups 0(%rdi),%xmm14
xorps %xmm15,%xmm14
xorps %xmm14,%xmm6
movups -80(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movups -64(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,202
movdqa 32-128(%rax),%xmm0
paddd %xmm11,%xmm0
.byte 102,68,15,56,0,227
leaq 64(%r10),%r10
movups -48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movups -32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,202
movdqa 64-128(%rax),%xmm0
paddd %xmm12,%xmm0
.byte 102,68,15,56,0,235
.byte 69,15,56,204,211
movups -16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm13,%xmm3
.byte 102,65,15,58,15,220,4
paddd %xmm3,%xmm10
movups 0(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,202
movdqa 96-128(%rax),%xmm0
paddd %xmm13,%xmm0
.byte 69,15,56,205,213
.byte 69,15,56,204,220
movups 16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movups 32(%rcx),%xmm4
aesenc %xmm5,%xmm6
movdqa %xmm10,%xmm3
.byte 102,65,15,58,15,221,4
paddd %xmm3,%xmm11
.byte 15,56,203,202
movdqa 128-128(%rax),%xmm0
paddd %xmm10,%xmm0
.byte 69,15,56,205,218
.byte 69,15,56,204,229
movups 48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm11,%xmm3
.byte 102,65,15,58,15,218,4
paddd %xmm3,%xmm12
cmpl $11,%r11d
jb .Laesenclast1
movups 64(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 80(%rcx),%xmm5
aesenc %xmm4,%xmm6
je .Laesenclast1
movups 96(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 112(%rcx),%xmm5
aesenc %xmm4,%xmm6
.Laesenclast1:
aesenclast %xmm5,%xmm6
movups 16-112(%rcx),%xmm4
nop
.byte 15,56,203,202
movups 16(%rdi),%xmm14
xorps %xmm15,%xmm14
movups %xmm6,0(%rsi,%rdi,1)
xorps %xmm14,%xmm6
movups -80(%rcx),%xmm5
aesenc %xmm4,%xmm6
movdqa 160-128(%rax),%xmm0
paddd %xmm11,%xmm0
.byte 69,15,56,205,227
.byte 69,15,56,204,234
movups -64(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm12,%xmm3
.byte 102,65,15,58,15,219,4
paddd %xmm3,%xmm13
movups -48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 192-128(%rax),%xmm0
paddd %xmm12,%xmm0
.byte 69,15,56,205,236
.byte 69,15,56,204,211
movups -32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm13,%xmm3
.byte 102,65,15,58,15,220,4
paddd %xmm3,%xmm10
movups -16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 224-128(%rax),%xmm0
paddd %xmm13,%xmm0
.byte 69,15,56,205,213
.byte 69,15,56,204,220
movups 0(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm10,%xmm3
.byte 102,65,15,58,15,221,4
paddd %xmm3,%xmm11
movups 16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 256-128(%rax),%xmm0
paddd %xmm10,%xmm0
.byte 69,15,56,205,218
.byte 69,15,56,204,229
movups 32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm11,%xmm3
.byte 102,65,15,58,15,218,4
paddd %xmm3,%xmm12
movups 48(%rcx),%xmm5
aesenc %xmm4,%xmm6
cmpl $11,%r11d
jb .Laesenclast2
movups 64(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 80(%rcx),%xmm5
aesenc %xmm4,%xmm6
je .Laesenclast2
movups 96(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 112(%rcx),%xmm5
aesenc %xmm4,%xmm6
.Laesenclast2:
aesenclast %xmm5,%xmm6
movups 16-112(%rcx),%xmm4
nop
.byte 15,56,203,202
movups 32(%rdi),%xmm14
xorps %xmm15,%xmm14
movups %xmm6,16(%rsi,%rdi,1)
xorps %xmm14,%xmm6
movups -80(%rcx),%xmm5
aesenc %xmm4,%xmm6
movdqa 288-128(%rax),%xmm0
paddd %xmm11,%xmm0
.byte 69,15,56,205,227
.byte 69,15,56,204,234
movups -64(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm12,%xmm3
.byte 102,65,15,58,15,219,4
paddd %xmm3,%xmm13
movups -48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 320-128(%rax),%xmm0
paddd %xmm12,%xmm0
.byte 69,15,56,205,236
.byte 69,15,56,204,211
movups -32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm13,%xmm3
.byte 102,65,15,58,15,220,4
paddd %xmm3,%xmm10
movups -16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 352-128(%rax),%xmm0
paddd %xmm13,%xmm0
.byte 69,15,56,205,213
.byte 69,15,56,204,220
movups 0(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm10,%xmm3
.byte 102,65,15,58,15,221,4
paddd %xmm3,%xmm11
movups 16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 384-128(%rax),%xmm0
paddd %xmm10,%xmm0
.byte 69,15,56,205,218
.byte 69,15,56,204,229
movups 32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm11,%xmm3
.byte 102,65,15,58,15,218,4
paddd %xmm3,%xmm12
movups 48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movdqa 416-128(%rax),%xmm0
paddd %xmm11,%xmm0
.byte 69,15,56,205,227
.byte 69,15,56,204,234
cmpl $11,%r11d
jb .Laesenclast3
movups 64(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 80(%rcx),%xmm5
aesenc %xmm4,%xmm6
je .Laesenclast3
movups 96(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 112(%rcx),%xmm5
aesenc %xmm4,%xmm6
.Laesenclast3:
aesenclast %xmm5,%xmm6
movups 16-112(%rcx),%xmm4
nop
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm12,%xmm3
.byte 102,65,15,58,15,219,4
paddd %xmm3,%xmm13
movups 48(%rdi),%xmm14
xorps %xmm15,%xmm14
movups %xmm6,32(%rsi,%rdi,1)
xorps %xmm14,%xmm6
movups -80(%rcx),%xmm5
aesenc %xmm4,%xmm6
movups -64(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,202
movdqa 448-128(%rax),%xmm0
paddd %xmm12,%xmm0
.byte 69,15,56,205,236
movdqa %xmm7,%xmm3
movups -48(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movups -32(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,202
movdqa 480-128(%rax),%xmm0
paddd %xmm13,%xmm0
movups -16(%rcx),%xmm5
aesenc %xmm4,%xmm6
movups 0(%rcx),%xmm4
aesenc %xmm5,%xmm6
.byte 15,56,203,209
pshufd $0x0e,%xmm0,%xmm0
movups 16(%rcx),%xmm5
aesenc %xmm4,%xmm6
.byte 15,56,203,202
movups 32(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 48(%rcx),%xmm5
aesenc %xmm4,%xmm6
cmpl $11,%r11d
jb .Laesenclast4
movups 64(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 80(%rcx),%xmm5
aesenc %xmm4,%xmm6
je .Laesenclast4
movups 96(%rcx),%xmm4
aesenc %xmm5,%xmm6
movups 112(%rcx),%xmm5
aesenc %xmm4,%xmm6
.Laesenclast4:
aesenclast %xmm5,%xmm6
movups 16-112(%rcx),%xmm4
nop
paddd %xmm9,%xmm2
paddd %xmm8,%xmm1
decq %rdx
movups %xmm6,48(%rsi,%rdi,1)
leaq 64(%rdi),%rdi
jnz .Loop_shaext
pshufd $0xb1,%xmm2,%xmm2
pshufd $0x1b,%xmm1,%xmm3
pshufd $0xb1,%xmm1,%xmm1
punpckhqdq %xmm2,%xmm1
.byte 102,15,58,15,211,8
movups %xmm6,(%r8)
movdqu %xmm1,(%r9)
movdqu %xmm2,16(%r9)
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext