diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-09-20 16:31:01 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-09-21 16:05:22 +0200 |
commit | f61fb1b86c28225353ee67802b512c8529d21fb0 (patch) | |
tree | 19ed9b2463f47fca15b706d82faf8080a5c28742 /src/crypto/zinc/chacha20/chacha20-arm-glue.h | |
parent | 08edd02db06f9dd424bc023703fed8ea8c42c97d (diff) |
chacha20-arm: go with Ard's version to optimize for Cortex-A7
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src/crypto/zinc/chacha20/chacha20-arm-glue.h')
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-arm-glue.h | 52 |
1 files changed, 44 insertions, 8 deletions
diff --git a/src/crypto/zinc/chacha20/chacha20-arm-glue.h b/src/crypto/zinc/chacha20/chacha20-arm-glue.h index 9ec2cd8..4a123c9 100644 --- a/src/crypto/zinc/chacha20/chacha20-arm-glue.h +++ b/src/crypto/zinc/chacha20/chacha20-arm-glue.h @@ -8,12 +8,17 @@ asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len, const u32 key[8], const u32 counter[4]); -#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && \ - (defined(CONFIG_64BIT) || __LINUX_ARM_ARCH__ >= 7) -#define ARM_USE_NEON +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) +#if defined(__LINUX_ARM_ARCH__) && __LINUX_ARM_ARCH__ == 7 +#define ARM_USE_NEONv7 +asmlinkage void chacha20_neon_1block(const u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_neon_4block(const u32 *state, u8 *dst, const u8 *src); +#elif defined(CONFIG_64BIT) +#define ARM_USE_NEONv8 asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len, const u32 key[8], const u32 counter[4]); #endif +#endif static bool chacha20_use_neon __ro_after_init; @@ -26,17 +31,48 @@ static void __init chacha20_fpu_init(void) #endif } -static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len, - const u32 key[8], const u32 counter[4], +static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst, + const u8 *src, size_t len, simd_context_t *simd_context) { -#if defined(ARM_USE_NEON) +#if defined(ARM_USE_NEONv7) if (chacha20_use_neon && simd_use(simd_context)) { - chacha20_neon(dst, src, len, key, counter); + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (len >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_neon_4block((u32 *)state, dst, src); + len -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state->counter[0] += 4; + } + while (len >= CHACHA20_BLOCK_SIZE) { + chacha20_neon_1block((u32 *)state, dst, src); + len -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state->counter[0] += 1; + } + if (len) { + memcpy(buf, src, len); + chacha20_neon_1block((u32 *)state, buf, buf); + state->counter[0] += 1; + memcpy(dst, buf, len); + } return true; } +#elif defined(ARM_USE_NEONv8) + if (chacha20_use_neon && simd_use(simd_context)) { + chacha20_neon(dst, src, len, state->key, state->counter); + goto success; + } #endif - chacha20_arm(dst, src, len, key, counter); + + chacha20_arm(dst, src, len, state->key, state->counter); + goto success; + +success: + state->counter[0] += (len + 63) / 64; return true; } |