summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/crypto/zinc/chacha20/chacha20-mips-glue.h7
-rw-r--r--src/crypto/zinc/chacha20/chacha20-mips.S315
2 files changed, 143 insertions, 179 deletions
diff --git a/src/crypto/zinc/chacha20/chacha20-mips-glue.h b/src/crypto/zinc/chacha20/chacha20-mips-glue.h
index e38098e..929ca12 100644
--- a/src/crypto/zinc/chacha20/chacha20-mips-glue.h
+++ b/src/crypto/zinc/chacha20/chacha20-mips-glue.h
@@ -3,8 +3,8 @@
* Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
-asmlinkage void chacha20_mips(u8 *out, const u8 *in, const size_t len,
- const u32 key[8], const u32 counter[4]);
+asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
+ const size_t len);
static void __init chacha20_fpu_init(void)
{
}
@@ -13,8 +13,7 @@ static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
const u8 *src, const size_t len,
simd_context_t *simd_context)
{
- chacha20_mips(dst, src, len, state->key, state->counter);
- state->counter[0] += (len + 63) / 64;
+ chacha20_mips((u32 *)state, dst, src, len);
return true;
}
diff --git a/src/crypto/zinc/chacha20/chacha20-mips.S b/src/crypto/zinc/chacha20/chacha20-mips.S
index 2b82ebf..7e2b5e8 100644
--- a/src/crypto/zinc/chacha20/chacha20-mips.S
+++ b/src/crypto/zinc/chacha20/chacha20-mips.S
@@ -7,7 +7,7 @@
#define MASK_U32 0x3c
#define MASK_BYTES 0x03
#define CHACHA20_BLOCK_SIZE 64
-#define STACK_SIZE 64
+#define STACK_SIZE 40
#define X0 $t0
#define X1 $t1
@@ -17,8 +17,8 @@
#define X5 $t5
#define X6 $t6
#define X7 $t7
-#define X8 $v1
-#define X9 $fp
+#define X8 $t8
+#define X9 $t9
#define X10 $s7
#define X11 $s6
#define X12 $s5
@@ -32,13 +32,10 @@
#define X(n) X ## n
/* Input arguments */
-#define OUT $a0
-#define IN $a1
-#define BYTES $a2
-/* KEY and NONCE argument must be u32 aligned */
-#define KEY $a3
-/* NONCE pointer is given via stack, must be u32 aligned */
-#define NONCE $t9
+#define STATE $a0
+#define OUT $a1
+#define IN $a2
+#define BYTES $a3
/* Output argument */
/* NONCE[0] is kept in a register and not in memory.
@@ -54,16 +51,8 @@
#define SAVED_X X15
#define SAVED_CA $ra
-#define PTR_LAST_ROUND $t8
-
-/* ChaCha20 constants and stack location */
-#define CONSTANT_OFS_SP 48
-#define UNALIGNED_OFS_SP 40
-
-#define CONSTANT_1 0x61707865
-#define CONSTANT_2 0x3320646e
-#define CONSTANT_3 0x79622d32
-#define CONSTANT_4 0x6b206574
+#define PTR_LAST_ROUND $v1
+#define IS_UNALIGNED $fp
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
@@ -81,65 +70,121 @@
#define ROTR(n)
#endif
-#define STORE_UNALIGNED(x, a, s, o) \
-.Lchacha20_mips_xor_unaligned_ ## x ## _b: ; \
- .if ((s != NONCE) || (o != 0)); \
- lw T0, o(s); \
+#define FOR_EACH_WORD(x) \
+ x( 0); \
+ x( 1); \
+ x( 2); \
+ x( 3); \
+ x( 4); \
+ x( 5); \
+ x( 6); \
+ x( 7); \
+ x( 8); \
+ x( 9); \
+ x(10); \
+ x(11); \
+ x(12); \
+ x(13); \
+ x(14); \
+ x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+ x(15); \
+ x(14); \
+ x(13); \
+ x(12); \
+ x(11); \
+ x(10); \
+ x( 9); \
+ x( 8); \
+ x( 7); \
+ x( 6); \
+ x( 5); \
+ x( 4); \
+ x( 3); \
+ x( 2); \
+ x( 1); \
+ x( 0);
+
+#define PLUS_ONE_0 1
+#define PLUS_ONE_1 2
+#define PLUS_ONE_2 3
+#define PLUS_ONE_3 4
+#define PLUS_ONE_4 5
+#define PLUS_ONE_5 6
+#define PLUS_ONE_6 7
+#define PLUS_ONE_7 8
+#define PLUS_ONE_8 9
+#define PLUS_ONE_9 10
+#define PLUS_ONE_10 11
+#define PLUS_ONE_11 12
+#define PLUS_ONE_12 13
+#define PLUS_ONE_13 14
+#define PLUS_ONE_14 15
+#define PLUS_ONE_15 16
+#define PLUS_ONE(x) PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c) a ## b ## c
+#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
.endif; \
- lwl T1, x-4+MSB ## (IN); \
- lwr T1, x-4+LSB ## (IN); \
- .if ((s == NONCE) && (o == 0)); \
- addu X ## a, NONCE_0; \
+ lwl T1, (x*4)+MSB ## (IN); \
+ lwr T1, (x*4)+LSB ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
.else; \
- addu X ## a, T0; \
+ addu X ## x, T0; \
.endif; \
- CPU_TO_LE32(X ## a); \
- xor X ## a, T1; \
- swl X ## a, x-4+MSB ## (OUT); \
- swr X ## a, x-4+LSB ## (OUT);
-
-#define STORE_ALIGNED(x, a, s, o) \
-.Lchacha20_mips_xor_aligned_ ## x ## _b: ; \
- .if ((s != NONCE) || (o != 0)); \
- lw T0, o(s); \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ swl X ## x, (x*4)+MSB ## (OUT); \
+ swr X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
.endif; \
- lw T1, x-4 ## (IN); \
- .if ((s == NONCE) && (o == 0)); \
- addu X ## a, NONCE_0; \
+ lw T1, (x*4) ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
.else; \
- addu X ## a, T0; \
+ addu X ## x, T0; \
.endif; \
- CPU_TO_LE32(X ## a); \
- xor X ## a, T1; \
- sw X ## a, x-4 ## (OUT);
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ sw X ## x, (x*4) ## (OUT);
/* Jump table macro.
* Used for setup and handling the last bytes, which are not multiple of 4.
* X15 is free to store Xn
* Every jumptable entry must be equal in size.
*/
-#define JMPTBL_ALIGNED(x, a, s, o) \
-.Lchacha20_mips_jmptbl_aligned_ ## a: ; \
+#define JMPTBL_ALIGNED(x) \
+.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
.set noreorder; \
- .if ((s == NONCE) && (o == 0)); \
+ .if (x == 12); \
move SAVED_CA, NONCE_0; \
.else; \
- lw SAVED_CA, o(s);\
+ lw SAVED_CA, (x*4)(STATE); \
.endif; \
b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
- move SAVED_X, X ## a; \
+ move SAVED_X, X ## x; \
.set reorder
-#define JMPTBL_UNALIGNED(x, a, s, o) \
-.Lchacha20_mips_jmptbl_unaligned_ ## a: ; \
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
.set noreorder; \
- .if ((s == NONCE) && (o == 0)); \
+ .if (x == 12); \
move SAVED_CA, NONCE_0; \
.else; \
- lw SAVED_CA, o(s);\
+ lw SAVED_CA, (x*4)(STATE);\
.endif; \
b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
- move SAVED_X, X ## a; \
+ move SAVED_X, X ## x; \
.set reorder
#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
@@ -163,8 +208,6 @@
.ent chacha20_mips
chacha20_mips:
.frame $sp, STACK_SIZE, $ra
- /* This is in the fifth argument */
- lw NONCE, 16($sp)
addiu $sp, -STACK_SIZE
@@ -176,7 +219,7 @@ chacha20_mips:
ins PTR_LAST_ROUND, $zero, 0, 6
addu PTR_LAST_ROUND, OUT
- /* Save s0-s7, fp, ra. */
+ /* Save s0-s7, ra, fp */
sw $ra, 0($sp)
sw $fp, 4($sp)
sw $s0, 8($sp)
@@ -188,30 +231,13 @@ chacha20_mips:
sw $s6, 32($sp)
sw $s7, 36($sp)
- lw NONCE_0, 0(NONCE)
+ lw NONCE_0, 48(STATE)
+
/* Test IN or OUT is unaligned.
- * UNALIGNED (T1) = ( IN | OUT ) & 0x00000003
+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
*/
- or T1, IN, OUT
- andi T1, 0x3
-
- /* Load constant */
- lui X0, %hi(CONSTANT_1)
- lui X1, %hi(CONSTANT_2)
- lui X2, %hi(CONSTANT_3)
- lui X3, %hi(CONSTANT_4)
- ori X0, %lo(CONSTANT_1)
- ori X1, %lo(CONSTANT_2)
- ori X2, %lo(CONSTANT_3)
- ori X3, %lo(CONSTANT_4)
-
- /* Store constant on stack. */
- sw X0, 0+CONSTANT_OFS_SP($sp)
- sw X1, 4+CONSTANT_OFS_SP($sp)
- sw X2, 8+CONSTANT_OFS_SP($sp)
- sw X3, 12+CONSTANT_OFS_SP($sp)
-
- sw T1, UNALIGNED_OFS_SP($sp)
+ or IS_UNALIGNED, IN, OUT
+ andi IS_UNALIGNED, 0x3
andi BYTES, (CHACHA20_BLOCK_SIZE-1)
@@ -223,26 +249,25 @@ chacha20_mips:
addiu OUT, CHACHA20_BLOCK_SIZE
addiu NONCE_0, 1
- lw X0, 0+CONSTANT_OFS_SP($sp)
- lw X1, 4+CONSTANT_OFS_SP($sp)
- lw X2, 8+CONSTANT_OFS_SP($sp)
- lw X3, 12+CONSTANT_OFS_SP($sp)
- lw T1, UNALIGNED_OFS_SP($sp)
-
.Lchacha20_rounds_start:
- lw X4, 0(KEY)
- lw X5, 4(KEY)
- lw X6, 8(KEY)
- lw X7, 12(KEY)
- lw X8, 16(KEY)
- lw X9, 20(KEY)
- lw X10, 24(KEY)
- lw X11, 28(KEY)
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
move X12, NONCE_0
- lw X13, 4(NONCE)
- lw X14, 8(NONCE)
- lw X15, 12(NONCE)
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
li $at, 20
.Loop_chacha20_xor_rounds:
@@ -259,20 +284,20 @@ chacha20_mips:
andi $at, BYTES, MASK_U32
- /* Unaligned? Jump */
- bnez T1, .Loop_chacha20_unaligned
+ /* Is data src/dst unaligned? Jump */
+ bnez IS_UNALIGNED, .Loop_chacha20_unaligned
/* Load upper half of jump table addr */
lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
- /* Last round? No jump */
- bne OUT, PTR_LAST_ROUND, .Lchacha20_mips_xor_aligned_64_b
+ /* Last round? No, do a full block. */
+ bne OUT, PTR_LAST_ROUND, .Lchacha20_mips_xor_aligned_16_b
/* Calculate lower half jump table addr and offset */
ins T0, $at, 2, 6
/* Full block? Jump */
- beqz BYTES, .Lchacha20_mips_xor_aligned_64_b
+ beqz BYTES, .Lchacha20_mips_xor_aligned_16_b
subu T0, $at
addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
@@ -283,13 +308,13 @@ chacha20_mips:
lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
/* Last round? no jump */
- bne OUT, PTR_LAST_ROUND, .Lchacha20_mips_xor_unaligned_64_b
+ bne OUT, PTR_LAST_ROUND, .Lchacha20_mips_xor_unaligned_16_b
/* Calculate lower half jump table addr and offset */
ins T0, $at, 2, 6
/* Full block? Jump */
- beqz BYTES, .Lchacha20_mips_xor_unaligned_64_b
+ beqz BYTES, .Lchacha20_mips_xor_unaligned_16_b
subu T0, $at
addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
@@ -298,22 +323,7 @@ chacha20_mips:
/* Aligned code path
*/
.align 4
- STORE_ALIGNED(64, 15, NONCE,12)
- STORE_ALIGNED(60, 14, NONCE, 8)
- STORE_ALIGNED(56, 13, NONCE, 4)
- STORE_ALIGNED(52, 12, NONCE, 0)
- STORE_ALIGNED(48, 11, KEY, 28)
- STORE_ALIGNED(44, 10, KEY, 24)
- STORE_ALIGNED(40, 9, KEY, 20)
- STORE_ALIGNED(36, 8, KEY, 16)
- STORE_ALIGNED(32, 7, KEY, 12)
- STORE_ALIGNED(28, 6, KEY, 8)
- STORE_ALIGNED(24, 5, KEY, 4)
- STORE_ALIGNED(20, 4, KEY, 0)
- STORE_ALIGNED(16, 3, $sp, 12+CONSTANT_OFS_SP)
- STORE_ALIGNED(12, 2, $sp, 8+CONSTANT_OFS_SP)
- STORE_ALIGNED( 8, 1, $sp, 4+CONSTANT_OFS_SP)
- STORE_ALIGNED( 4, 0, $sp, 0+CONSTANT_OFS_SP)
+ FOR_EACH_WORD_REV(STORE_ALIGNED)
bne OUT, PTR_LAST_ROUND, .Loop_chacha20_rounds
@@ -322,6 +332,9 @@ chacha20_mips:
bne $at, BYTES, .Lchacha20_mips_xor_bytes
.Lchacha20_mips_xor_done:
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
/* Restore used registers */
lw $ra, 0($sp)
lw $fp, 4($sp)
@@ -337,43 +350,11 @@ chacha20_mips:
addiu $sp, STACK_SIZE
jr $ra
- /* Start jump table */
- JMPTBL_ALIGNED( 0, 0, $sp, 0+CONSTANT_OFS_SP)
- JMPTBL_ALIGNED( 4, 1, $sp, 4+CONSTANT_OFS_SP)
- JMPTBL_ALIGNED( 8, 2, $sp, 8+CONSTANT_OFS_SP)
- JMPTBL_ALIGNED(12, 3, $sp, 12+CONSTANT_OFS_SP)
- JMPTBL_ALIGNED(16, 4, KEY, 0)
- JMPTBL_ALIGNED(20, 5, KEY, 4)
- JMPTBL_ALIGNED(24, 6, KEY, 8)
- JMPTBL_ALIGNED(28, 7, KEY, 12)
- JMPTBL_ALIGNED(32, 8, KEY, 16)
- JMPTBL_ALIGNED(36, 9, KEY, 20)
- JMPTBL_ALIGNED(40, 10, KEY, 24)
- JMPTBL_ALIGNED(44, 11, KEY, 28)
- JMPTBL_ALIGNED(48, 12, NONCE, 0)
- JMPTBL_ALIGNED(52, 13, NONCE, 4)
- JMPTBL_ALIGNED(56, 14, NONCE, 8)
- JMPTBL_ALIGNED(60, 15, NONCE,12)
- /* End jump table */
-
-/* Unaligned code path
- */
- STORE_UNALIGNED(64, 15, NONCE,12)
- STORE_UNALIGNED(60, 14, NONCE, 8)
- STORE_UNALIGNED(56, 13, NONCE, 4)
- STORE_UNALIGNED(52, 12, NONCE, 0)
- STORE_UNALIGNED(48, 11, KEY, 28)
- STORE_UNALIGNED(44, 10, KEY, 24)
- STORE_UNALIGNED(40, 9, KEY, 20)
- STORE_UNALIGNED(36, 8, KEY, 16)
- STORE_UNALIGNED(32, 7, KEY, 12)
- STORE_UNALIGNED(28, 6, KEY, 8)
- STORE_UNALIGNED(24, 5, KEY, 4)
- STORE_UNALIGNED(20, 4, KEY, 0)
- STORE_UNALIGNED(16, 3, $sp, 12+CONSTANT_OFS_SP)
- STORE_UNALIGNED(12, 2, $sp, 8+CONSTANT_OFS_SP)
- STORE_UNALIGNED( 8, 1, $sp, 4+CONSTANT_OFS_SP)
- STORE_UNALIGNED( 4, 0, $sp, 0+CONSTANT_OFS_SP)
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+ /* Unaligned code path */
+ FOR_EACH_WORD_REV(STORE_UNALIGNED)
bne OUT, PTR_LAST_ROUND, .Loop_chacha20_rounds
@@ -413,23 +394,7 @@ chacha20_mips:
b .Lchacha20_mips_xor_done
.Lchacha20_mips_jmptbl_unaligned:
- /* Start jump table */
- JMPTBL_UNALIGNED( 0, 0, $sp, 0+CONSTANT_OFS_SP)
- JMPTBL_UNALIGNED( 4, 1, $sp, 4+CONSTANT_OFS_SP)
- JMPTBL_UNALIGNED( 8, 2, $sp, 8+CONSTANT_OFS_SP)
- JMPTBL_UNALIGNED(12, 3, $sp, 12+CONSTANT_OFS_SP)
- JMPTBL_UNALIGNED(16, 4, KEY, 0)
- JMPTBL_UNALIGNED(20, 5, KEY, 4)
- JMPTBL_UNALIGNED(24, 6, KEY, 8)
- JMPTBL_UNALIGNED(28, 7, KEY, 12)
- JMPTBL_UNALIGNED(32, 8, KEY, 16)
- JMPTBL_UNALIGNED(36, 9, KEY, 20)
- JMPTBL_UNALIGNED(40, 10, KEY, 24)
- JMPTBL_UNALIGNED(44, 11, KEY, 28)
- JMPTBL_UNALIGNED(48, 12, NONCE, 0)
- JMPTBL_UNALIGNED(52, 13, NONCE, 4)
- JMPTBL_UNALIGNED(56, 14, NONCE, 8)
- JMPTBL_UNALIGNED(60, 15, NONCE,12)
- /* End jump table */
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_UNALIGNED)
.end chacha20_mips
.set at