diff options
-rw-r--r-- | src/crypto/chacha20-avx2-x86_64.S | 4 | ||||
-rw-r--r-- | src/crypto/chacha20-ssse3-x86_64.S | 4 | ||||
-rw-r--r-- | src/crypto/curve25519-avx-x86_64.S | 52 |
3 files changed, 29 insertions, 31 deletions
diff --git a/src/crypto/chacha20-avx2-x86_64.S b/src/crypto/chacha20-avx2-x86_64.S index 8aabae0..48d6cc4 100644 --- a/src/crypto/chacha20-avx2-x86_64.S +++ b/src/crypto/chacha20-avx2-x86_64.S @@ -43,7 +43,7 @@ ENTRY(chacha20_asm_8block_xor_avx2) vzeroupper # 4 * 32 byte stack, 32-byte aligned - mov %rsp, %r8 + lea 8(%rsp),%r10 and $~31, %rsp sub $0x80, %rsp @@ -441,6 +441,6 @@ ENTRY(chacha20_asm_8block_xor_avx2) vmovdqu %ymm15,0x01e0(%rsi) vzeroupper - mov %r8,%rsp + lea -8(%r10),%rsp ret ENDPROC(chacha20_asm_8block_xor_avx2) diff --git a/src/crypto/chacha20-ssse3-x86_64.S b/src/crypto/chacha20-ssse3-x86_64.S index f1fe3c9..483f79a 100644 --- a/src/crypto/chacha20-ssse3-x86_64.S +++ b/src/crypto/chacha20-ssse3-x86_64.S @@ -164,7 +164,7 @@ ENTRY(chacha20_asm_4block_xor_ssse3) # done with the slightly better performing SSSE3 byte shuffling, # 7/12-bit word rotation uses traditional shift+OR. - mov %rsp,%r11 + lea 8(%rsp),%r10 sub $0x80,%rsp and $~63,%rsp @@ -629,7 +629,7 @@ ENTRY(chacha20_asm_4block_xor_ssse3) pxor %xmm1,%xmm15 movdqu %xmm15,0xf0(%rsi) - mov %r11,%rsp + lea -8(%r10),%rsp ret ENDPROC(chacha20_asm_4block_xor_ssse3) diff --git a/src/crypto/curve25519-avx-x86_64.S b/src/crypto/curve25519-avx-x86_64.S index bd636b5..b4851e5 100644 --- a/src/crypto/curve25519-avx-x86_64.S +++ b/src/crypto/curve25519-avx-x86_64.S @@ -25,10 +25,10 @@ curve25519_sandy2x_REDMASK51: .quad 0x0007FFFFFFFFFFFF .text .align 32 ENTRY(curve25519_sandy2x_fe51_mul) - mov %rsp,%r11 - and $31,%r11 - add $96,%r11 - sub %r11,%rsp + push %rbp + mov %rsp,%rbp + sub $96,%rsp + and $-32,%rsp movq %r11,0(%rsp) movq %r12,8(%rsp) movq %r13,16(%rsp) @@ -195,18 +195,16 @@ ENTRY(curve25519_sandy2x_fe51_mul) movq 32(%rsp),%r15 movq 40(%rsp),%rbx movq 48(%rsp),%rbp - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx + leave ret ENDPROC(curve25519_sandy2x_fe51_mul) .align 32 ENTRY(curve25519_sandy2x_fe51_nsquare) - mov %rsp,%r11 - and $31,%r11 - add $64,%r11 - sub %r11,%rsp + push %rbp + mov %rsp,%rbp + sub $64,%rsp + and $-32,%rsp movq %r11,0(%rsp) movq %r12,8(%rsp) movq %r13,16(%rsp) @@ -348,16 +346,16 @@ ENTRY(curve25519_sandy2x_fe51_nsquare) movq 32(%rsp),%r15 movq 40(%rsp),%rbx movq 48(%rsp),%rbp - add %r11,%rsp + leave ret ENDPROC(curve25519_sandy2x_fe51_nsquare) .align 32 ENTRY(curve25519_sandy2x_fe51_pack) - mov %rsp,%r11 - and $31,%r11 - add $32,%r11 - sub %r11,%rsp + push %rbp + mov %rsp,%rbp + sub $32,%rsp + and $-32,%rsp movq %r11,0(%rsp) movq %r12,8(%rsp) movq 0(%rsi),%rdx @@ -553,16 +551,16 @@ ENTRY(curve25519_sandy2x_fe51_pack) movb %sil,31(%rdi) movq 0(%rsp),%r11 movq 8(%rsp),%r12 - add %r11,%rsp + leave ret ENDPROC(curve25519_sandy2x_fe51_pack) .align 32 ENTRY(curve25519_sandy2x_ladder) - mov %rsp,%r11 - and $31,%r11 - add $1856,%r11 - sub %r11,%rsp + push %rbp + mov %rsp,%rbp + sub $1856,%rsp + and $-32,%rsp movq %r11,1824(%rsp) movq %r12,1832(%rsp) movq %r13,1840(%rsp) @@ -1976,16 +1974,16 @@ ENTRY(curve25519_sandy2x_ladder) movq 1832(%rsp),%r12 movq 1840(%rsp),%r13 movq 1848(%rsp),%r14 - add %r11,%rsp + leave ret ENDPROC(curve25519_sandy2x_ladder) .align 32 ENTRY(curve25519_sandy2x_ladder_base) - mov %rsp,%r11 - and $31,%r11 - add $1568,%r11 - sub %r11,%rsp + push %rbp + mov %rsp,%rbp + sub $1568,%rsp + and $-32,%rsp movq %r11,1536(%rsp) movq %r12,1544(%rsp) movq %r13,1552(%rsp) @@ -3254,6 +3252,6 @@ ENTRY(curve25519_sandy2x_ladder_base) movq 1536(%rsp),%r11 movq 1544(%rsp),%r12 movq 1552(%rsp),%r13 - add %r11,%rsp + leave ret ENDPROC(curve25519_sandy2x_ladder_base) |