summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRené van Dorst <opensource@vdorst.com>2018-09-20 15:38:33 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2018-09-21 16:05:22 +0200
commiteb7942380a4cc4d772a8c9e57974918480868336 (patch)
tree9666e20a2c28c9caa72f31be6b3fc787c9515aad /src
parentc49fb7ea293e66e1efb068999cca040fa21dec31 (diff)
poly1305-mips32r2: remove all reorder directives
This requires some minimal rearranging to make work, but for the most part as does the right thing, provided we pass it an optimization flag. Suggested-by: Paul Burton <paul.burton@mips.com> Signed-off-by: René van Dorst <opensource@vdorst.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src')
-rw-r--r--src/crypto/Kbuild.include1
-rw-r--r--src/crypto/zinc/poly1305/poly1305-mips.S54
2 files changed, 23 insertions, 32 deletions
diff --git a/src/crypto/Kbuild.include b/src/crypto/Kbuild.include
index 9ea7452..6f1f8d2 100644
--- a/src/crypto/Kbuild.include
+++ b/src/crypto/Kbuild.include
@@ -34,6 +34,7 @@ endif
ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy)
wireguard-y += crypto/zinc/poly1305/poly1305-mips.o
CFLAGS_poly1305.o += -DCONFIG_ZINC_ARCH_MIPS
+AFLAGS_poly1305-mips.o += -O2
endif
ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy)
wireguard-y += crypto/zinc/poly1305/poly1305-mips64.o
diff --git a/src/crypto/zinc/poly1305/poly1305-mips.S b/src/crypto/zinc/poly1305/poly1305-mips.S
index 128b60c..4d695ee 100644
--- a/src/crypto/zinc/poly1305/poly1305-mips.S
+++ b/src/crypto/zinc/poly1305/poly1305-mips.S
@@ -52,23 +52,21 @@
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
#define POLY1305_BLOCK_SIZE 16
-#define POLY1305_STACK_SIZE 8 * 4
+#define POLY1305_STACK_SIZE 32
-.set reorder
-.set noat
-.align 4
-.globl poly1305_blocks_mips
-.ent poly1305_blocks_mips
+.set noat
+.align 4
+.globl poly1305_blocks_mips
+.ent poly1305_blocks_mips
poly1305_blocks_mips:
- .frame $sp,POLY1305_STACK_SIZE,$31
+ .frame $sp, POLY1305_STACK_SIZE, $ra
/* srclen &= 0xFFFFFFF0 */
ins srclen, $zero, 0, 4
- .set noreorder
+ addiu $sp, -(POLY1305_STACK_SIZE)
+
/* check srclen >= 16 bytes */
beqz srclen, .Lpoly1305_blocks_mips_end
- addiu $sp, -(POLY1305_STACK_SIZE)
- .set reorder
/* Calculate last round based on src address pointer.
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
@@ -210,12 +208,10 @@ poly1305_blocks_mips:
srl CA, O4, 2
ins O4, $zero, 0, 2
+ addu CA, O4
+
/* able to do a 16 byte block. */
- .set noreorder
bne src, srclen, .Lpoly1305_loop
- /* Delay slot is always executed. */
- addu CA, O4
- .set reorder
/* restore the used save registers. */
lw $s0, 0($sp)
@@ -234,14 +230,12 @@ poly1305_blocks_mips:
sw H4, PTR_POLY1305_H(4)
.Lpoly1305_blocks_mips_end:
+ addiu $sp, POLY1305_STACK_SIZE
+
/* Jump Back */
- .set noreorder
jr $ra
- addiu $sp, POLY1305_STACK_SIZE
- .set reorder
.end poly1305_blocks_mips
.set at
-.set reorder
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
#define MAC $a1
@@ -253,11 +247,10 @@ poly1305_blocks_mips:
#define G3 $t8
#define G4 $t9
-.set reorder
-.set noat
-.align 4
-.globl poly1305_emit_mips
-.ent poly1305_emit_mips
+.set noat
+.align 4
+.globl poly1305_emit_mips
+.ent poly1305_emit_mips
poly1305_emit_mips:
/* load Hx and Carry */
lw CA, PTR_POLY1305_CA
@@ -347,10 +340,9 @@ poly1305_emit_mips:
swr H0, 0+LSB(MAC)
swr H1, 4+LSB(MAC)
swr H2, 8+LSB(MAC)
- .set noreorder
- jr $ra
swr H3,12+LSB(MAC)
- .set reorder
+
+ jr $ra
.end poly1305_emit_mips
#define PR0 $t0
@@ -361,9 +353,9 @@ poly1305_emit_mips:
/* Input arguments CTX=$a0, KEY=$a1 */
-.align 4
-.globl poly1305_init_mips
-.ent poly1305_init_mips
+.align 4
+.globl poly1305_init_mips
+.ent poly1305_init_mips
poly1305_init_mips:
lwl PR0, 0+MSB($a1)
lwl PR1, 4+MSB($a1)
@@ -408,10 +400,8 @@ poly1305_init_mips:
sw PR0, PTR_POLY1305_R(0)
sw PR1, PTR_POLY1305_R(1)
sw PR2, PTR_POLY1305_R(2)
+ sw PR3, PTR_POLY1305_R(3)
- .set noreorder
/* Jump Back */
jr $ra
- sw PR3, PTR_POLY1305_R(3)
- .set reorder
.end poly1305_init_mips