diff options
author | René van Dorst <opensource@vdorst.com> | 2018-09-20 15:38:33 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-09-21 16:05:22 +0200 |
commit | eb7942380a4cc4d772a8c9e57974918480868336 (patch) | |
tree | 9666e20a2c28c9caa72f31be6b3fc787c9515aad /src | |
parent | c49fb7ea293e66e1efb068999cca040fa21dec31 (diff) |
poly1305-mips32r2: remove all reorder directives
This requires some minimal rearranging to make work, but for the most
part as does the right thing, provided we pass it an optimization flag.
Suggested-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/crypto/Kbuild.include | 1 | ||||
-rw-r--r-- | src/crypto/zinc/poly1305/poly1305-mips.S | 54 |
2 files changed, 23 insertions, 32 deletions
diff --git a/src/crypto/Kbuild.include b/src/crypto/Kbuild.include index 9ea7452..6f1f8d2 100644 --- a/src/crypto/Kbuild.include +++ b/src/crypto/Kbuild.include @@ -34,6 +34,7 @@ endif ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy) wireguard-y += crypto/zinc/poly1305/poly1305-mips.o CFLAGS_poly1305.o += -DCONFIG_ZINC_ARCH_MIPS +AFLAGS_poly1305-mips.o += -O2 endif ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy) wireguard-y += crypto/zinc/poly1305/poly1305-mips64.o diff --git a/src/crypto/zinc/poly1305/poly1305-mips.S b/src/crypto/zinc/poly1305/poly1305-mips.S index 128b60c..4d695ee 100644 --- a/src/crypto/zinc/poly1305/poly1305-mips.S +++ b/src/crypto/zinc/poly1305/poly1305-mips.S @@ -52,23 +52,21 @@ #define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0) #define POLY1305_BLOCK_SIZE 16 -#define POLY1305_STACK_SIZE 8 * 4 +#define POLY1305_STACK_SIZE 32 -.set reorder -.set noat -.align 4 -.globl poly1305_blocks_mips -.ent poly1305_blocks_mips +.set noat +.align 4 +.globl poly1305_blocks_mips +.ent poly1305_blocks_mips poly1305_blocks_mips: - .frame $sp,POLY1305_STACK_SIZE,$31 + .frame $sp, POLY1305_STACK_SIZE, $ra /* srclen &= 0xFFFFFFF0 */ ins srclen, $zero, 0, 4 - .set noreorder + addiu $sp, -(POLY1305_STACK_SIZE) + /* check srclen >= 16 bytes */ beqz srclen, .Lpoly1305_blocks_mips_end - addiu $sp, -(POLY1305_STACK_SIZE) - .set reorder /* Calculate last round based on src address pointer. * last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0) @@ -210,12 +208,10 @@ poly1305_blocks_mips: srl CA, O4, 2 ins O4, $zero, 0, 2 + addu CA, O4 + /* able to do a 16 byte block. */ - .set noreorder bne src, srclen, .Lpoly1305_loop - /* Delay slot is always executed. */ - addu CA, O4 - .set reorder /* restore the used save registers. */ lw $s0, 0($sp) @@ -234,14 +230,12 @@ poly1305_blocks_mips: sw H4, PTR_POLY1305_H(4) .Lpoly1305_blocks_mips_end: + addiu $sp, POLY1305_STACK_SIZE + /* Jump Back */ - .set noreorder jr $ra - addiu $sp, POLY1305_STACK_SIZE - .set reorder .end poly1305_blocks_mips .set at -.set reorder /* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */ #define MAC $a1 @@ -253,11 +247,10 @@ poly1305_blocks_mips: #define G3 $t8 #define G4 $t9 -.set reorder -.set noat -.align 4 -.globl poly1305_emit_mips -.ent poly1305_emit_mips +.set noat +.align 4 +.globl poly1305_emit_mips +.ent poly1305_emit_mips poly1305_emit_mips: /* load Hx and Carry */ lw CA, PTR_POLY1305_CA @@ -347,10 +340,9 @@ poly1305_emit_mips: swr H0, 0+LSB(MAC) swr H1, 4+LSB(MAC) swr H2, 8+LSB(MAC) - .set noreorder - jr $ra swr H3,12+LSB(MAC) - .set reorder + + jr $ra .end poly1305_emit_mips #define PR0 $t0 @@ -361,9 +353,9 @@ poly1305_emit_mips: /* Input arguments CTX=$a0, KEY=$a1 */ -.align 4 -.globl poly1305_init_mips -.ent poly1305_init_mips +.align 4 +.globl poly1305_init_mips +.ent poly1305_init_mips poly1305_init_mips: lwl PR0, 0+MSB($a1) lwl PR1, 4+MSB($a1) @@ -408,10 +400,8 @@ poly1305_init_mips: sw PR0, PTR_POLY1305_R(0) sw PR1, PTR_POLY1305_R(1) sw PR2, PTR_POLY1305_R(2) + sw PR3, PTR_POLY1305_R(3) - .set noreorder /* Jump Back */ jr $ra - sw PR3, PTR_POLY1305_R(3) - .set reorder .end poly1305_init_mips |