summaryrefslogtreecommitdiffhomepage
path: root/src/crypto/zinc/chacha20/chacha20-arm-glue.h
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-09-20 16:31:01 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2018-09-21 16:05:22 +0200
commitf61fb1b86c28225353ee67802b512c8529d21fb0 (patch)
tree19ed9b2463f47fca15b706d82faf8080a5c28742 /src/crypto/zinc/chacha20/chacha20-arm-glue.h
parent08edd02db06f9dd424bc023703fed8ea8c42c97d (diff)
chacha20-arm: go with Ard's version to optimize for Cortex-A7
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src/crypto/zinc/chacha20/chacha20-arm-glue.h')
-rw-r--r--src/crypto/zinc/chacha20/chacha20-arm-glue.h52
1 files changed, 44 insertions, 8 deletions
diff --git a/src/crypto/zinc/chacha20/chacha20-arm-glue.h b/src/crypto/zinc/chacha20/chacha20-arm-glue.h
index 9ec2cd8..4a123c9 100644
--- a/src/crypto/zinc/chacha20/chacha20-arm-glue.h
+++ b/src/crypto/zinc/chacha20/chacha20-arm-glue.h
@@ -8,12 +8,17 @@
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
-#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && \
- (defined(CONFIG_64BIT) || __LINUX_ARM_ARCH__ >= 7)
-#define ARM_USE_NEON
+#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+#if defined(__LINUX_ARM_ARCH__) && __LINUX_ARM_ARCH__ == 7
+#define ARM_USE_NEONv7
+asmlinkage void chacha20_neon_1block(const u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_neon_4block(const u32 *state, u8 *dst, const u8 *src);
+#elif defined(CONFIG_64BIT)
+#define ARM_USE_NEONv8
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
#endif
+#endif
static bool chacha20_use_neon __ro_after_init;
@@ -26,17 +31,48 @@ static void __init chacha20_fpu_init(void)
#endif
}
-static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len,
- const u32 key[8], const u32 counter[4],
+static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
+ const u8 *src, size_t len,
simd_context_t *simd_context)
{
-#if defined(ARM_USE_NEON)
+#if defined(ARM_USE_NEONv7)
if (chacha20_use_neon && simd_use(simd_context)) {
- chacha20_neon(dst, src, len, key, counter);
+ u8 buf[CHACHA20_BLOCK_SIZE];
+
+ while (len >= CHACHA20_BLOCK_SIZE * 4) {
+ chacha20_neon_4block((u32 *)state, dst, src);
+ len -= CHACHA20_BLOCK_SIZE * 4;
+ src += CHACHA20_BLOCK_SIZE * 4;
+ dst += CHACHA20_BLOCK_SIZE * 4;
+ state->counter[0] += 4;
+ }
+ while (len >= CHACHA20_BLOCK_SIZE) {
+ chacha20_neon_1block((u32 *)state, dst, src);
+ len -= CHACHA20_BLOCK_SIZE;
+ src += CHACHA20_BLOCK_SIZE;
+ dst += CHACHA20_BLOCK_SIZE;
+ state->counter[0] += 1;
+ }
+ if (len) {
+ memcpy(buf, src, len);
+ chacha20_neon_1block((u32 *)state, buf, buf);
+ state->counter[0] += 1;
+ memcpy(dst, buf, len);
+ }
return true;
}
+#elif defined(ARM_USE_NEONv8)
+ if (chacha20_use_neon && simd_use(simd_context)) {
+ chacha20_neon(dst, src, len, state->key, state->counter);
+ goto success;
+ }
#endif
- chacha20_arm(dst, src, len, key, counter);
+
+ chacha20_arm(dst, src, len, state->key, state->counter);
+ goto success;
+
+success:
+ state->counter[0] += (len + 63) / 64;
return true;
}