diff options
Diffstat (limited to 'src/crypto')
-rw-r--r-- | src/crypto/chacha20.c | 8 | ||||
-rw-r--r-- | src/crypto/chacha20.h | 5 | ||||
-rw-r--r-- | src/crypto/chacha20poly1305.c | 121 | ||||
-rw-r--r-- | src/crypto/chacha20poly1305.h | 5 | ||||
-rw-r--r-- | src/crypto/poly1305.c | 35 | ||||
-rw-r--r-- | src/crypto/poly1305.h | 7 | ||||
-rw-r--r-- | src/crypto/simd.h | 23 |
7 files changed, 106 insertions, 98 deletions
diff --git a/src/crypto/chacha20.c b/src/crypto/chacha20.c index a65ec8b..29f7e6b 100644 --- a/src/crypto/chacha20.c +++ b/src/crypto/chacha20.c @@ -151,9 +151,9 @@ static void chacha20_generic(u8 *out, const u8 *in, u32 len, const u32 key[8], c } } -void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd) +void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context) { - if (!have_simd + if (simd_context != HAVE_FULL_SIMD #if defined(CONFIG_X86_64) || !chacha20_use_ssse3 @@ -227,10 +227,10 @@ static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[ out[7] = cpu_to_le32(x[15]); } -void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd) +void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context) { #if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3) - if (have_simd && chacha20_use_ssse3) { + if (simd_context == HAVE_FULL_SIMD && chacha20_use_ssse3) { hchacha20_ssse3(derived_key, nonce, key); return; } diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h index 01fb99a..86ea4e3 100644 --- a/src/crypto/chacha20.h +++ b/src/crypto/chacha20.h @@ -6,6 +6,7 @@ #ifndef _WG_CHACHA20_H #define _WG_CHACHA20_H +#include "simd.h" #include <linux/kernel.h> #include <linux/types.h> @@ -39,8 +40,8 @@ static inline void chacha20_init(struct chacha20_ctx *state, const u8 key[CHACHA state->counter[2] = nonce & U32_MAX; state->counter[3] = nonce >> 32; } -void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd); +void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context); -void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd); +void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context); #endif /* _WG_CHACHA20_H */ diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index b47f6a5..30d5444 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -6,7 +6,6 @@ #include "chacha20poly1305.h" #include "chacha20.h" #include "poly1305.h" -#include "simd.h" #include <linux/kernel.h> #include <crypto/scatterwalk.h> @@ -29,7 +28,7 @@ static struct blkcipher_desc chacha20_desc = { static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -39,22 +38,22 @@ static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size } b = {{ 0 }}; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - chacha20(&chacha20_state, dst, src, src_len, have_simd); + chacha20(&chacha20_state, dst, src, src_len, simd_context); - poly1305_update(&poly1305_state, dst, src_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, dst, src_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, dst + src_len, have_simd); + poly1305_finish(&poly1305_state, dst + src_len, simd_context); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); @@ -64,17 +63,17 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd; + simd_context_t simd_context; - have_simd = simd_get(); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - simd_put(have_simd); + simd_context = simd_get(); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + simd_put(simd_context); } bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -87,11 +86,11 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr } b = {{ 0 }}; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); if (likely(src_len)) { blkcipher_walk_init(&walk, dst, src, src_len); @@ -99,26 +98,26 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.mac, have_simd); + poly1305_finish(&poly1305_state, b.mac, simd_context); scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); err: memzero_explicit(&chacha20_state, sizeof(chacha20_state)); @@ -129,7 +128,7 @@ err: static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -145,25 +144,25 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); dst_len = src_len - POLY1305_MAC_SIZE; - poly1305_update(&poly1305_state, src, dst_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, src, dst_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.mac, have_simd); + poly1305_finish(&poly1305_state, b.mac, simd_context); ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE); if (likely(!ret)) - chacha20(&chacha20_state, dst, src, dst_len, have_simd); + chacha20(&chacha20_state, dst, src, dst_len, simd_context); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); @@ -175,18 +174,18 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd, ret; + simd_context_t simd_context, ret; - have_simd = simd_get(); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - simd_put(have_simd); + simd_context = simd_get(); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + simd_put(simd_context); return ret; } bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -206,11 +205,11 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); dst_len = src_len - POLY1305_MAC_SIZE; if (likely(dst_len)) { @@ -219,26 +218,26 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); + poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); + poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.computed_mac, have_simd); + poly1305_finish(&poly1305_state, b.computed_mac, simd_context); scatterwalk_map_and_copy(b.read_mac, src, dst_len, POLY1305_MAC_SIZE, 0); ret = crypto_memneq(b.read_mac, b.computed_mac, POLY1305_MAC_SIZE); @@ -253,13 +252,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); - hchacha20(derived_key, nonce, key, have_simd); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); + hchacha20(derived_key, nonce, key, simd_context); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - simd_put(have_simd); + simd_put(simd_context); } bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -267,13 +266,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool ret, have_simd = simd_get(); + bool ret, simd_context = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); - hchacha20(derived_key, nonce, key, have_simd); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); + hchacha20(derived_key, nonce, key, simd_context); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - simd_put(have_simd); + simd_put(simd_context); return ret; } diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h index 43b0a17..eb846c6 100644 --- a/src/crypto/chacha20poly1305.h +++ b/src/crypto/chacha20poly1305.h @@ -6,6 +6,7 @@ #ifndef _WG_CHACHA20POLY1305_H #define _WG_CHACHA20POLY1305_H +#include "simd.h" #include <linux/types.h> struct scatterlist; @@ -23,7 +24,7 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, bool __must_check chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd); + simd_context_t simd_context); bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, @@ -32,7 +33,7 @@ bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t bool __must_check chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd); + simd_context_t simd_context); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c index 65a37d9..be2eb33 100644 --- a/src/crypto/poly1305.c +++ b/src/crypto/poly1305.c @@ -5,6 +5,7 @@ */ #include "poly1305.h" +#include "simd.h" #include <linux/kernel.h> #include <linux/string.h> @@ -237,7 +238,7 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4]) } #endif -void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd) +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context) { ctx->nonce[0] = le32_to_cpup((__le32 *)&key[16]); ctx->nonce[1] = le32_to_cpup((__le32 *)&key[20]); @@ -256,28 +257,28 @@ void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bo ctx->num = 0; } -static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, bool have_simd) +static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, simd_context_t simd_context) { #if defined(CONFIG_X86_64) #ifdef CONFIG_AS_AVX512 - if (poly1305_use_avx512 && have_simd) + if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx512(ctx, inp, len, padbit); else #endif #ifdef CONFIG_AS_AVX2 - if (poly1305_use_avx2 && have_simd) + if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx2(ctx, inp, len, padbit); else #endif #ifdef CONFIG_AS_AVX - if (poly1305_use_avx && have_simd) + if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx(ctx, inp, len, padbit); else #endif poly1305_blocks_x86_64(ctx, inp, len, padbit); #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(ARM_USE_NEON) - if (poly1305_use_neon && have_simd) + if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD) poly1305_blocks_neon(ctx, inp, len, padbit); else #endif @@ -289,28 +290,28 @@ static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, c #endif } -static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], bool have_simd) +static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], simd_context_t simd_context) { #if defined(CONFIG_X86_64) #ifdef CONFIG_AS_AVX512 - if (poly1305_use_avx512 && have_simd) + if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif #ifdef CONFIG_AS_AVX2 - if (poly1305_use_avx2 && have_simd) + if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif #ifdef CONFIG_AS_AVX - if (poly1305_use_avx && have_simd) + if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif poly1305_emit_x86_64(ctx, mac, nonce); #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(ARM_USE_NEON) - if (poly1305_use_neon && have_simd) + if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD) poly1305_emit_neon(ctx, mac, nonce); else #endif @@ -322,7 +323,7 @@ static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 #endif } -void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool have_simd) +void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, simd_context_t simd_context) { const size_t num = ctx->num % POLY1305_BLOCK_SIZE; size_t rem; @@ -331,7 +332,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h rem = POLY1305_BLOCK_SIZE - num; if (len >= rem) { memcpy(ctx->data + num, inp, rem); - poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, have_simd); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, simd_context); inp += rem; len -= rem; } else { @@ -346,7 +347,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h len -= rem; if (len >= POLY1305_BLOCK_SIZE) { - poly1305_blocks(ctx->opaque, inp, len, 1, have_simd); + poly1305_blocks(ctx->opaque, inp, len, 1, simd_context); inp += len; } @@ -356,7 +357,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h ctx->num = rem; } -void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd) +void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context) { size_t num = ctx->num % POLY1305_BLOCK_SIZE; @@ -364,10 +365,10 @@ void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool h ctx->data[num++] = 1; /* pad bit */ while (num < POLY1305_BLOCK_SIZE) ctx->data[num++] = 0; - poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, have_simd); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, simd_context); } - poly1305_emit(ctx->opaque, mac, ctx->nonce, have_simd); + poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context); /* zero out the state */ memzero_explicit(ctx, sizeof(*ctx)); diff --git a/src/crypto/poly1305.h b/src/crypto/poly1305.h index 876234c..f8467bc 100644 --- a/src/crypto/poly1305.h +++ b/src/crypto/poly1305.h @@ -6,6 +6,7 @@ #ifndef _WG_POLY1305_H #define _WG_POLY1305_H +#include "simd.h" #include <linux/types.h> enum poly1305_lengths { @@ -23,9 +24,9 @@ struct poly1305_ctx { void poly1305_fpu_init(void); -void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd); -void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, bool have_simd); -void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd); +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context); +void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, simd_context_t simd_context); +void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context); #ifdef DEBUG bool poly1305_selftest(void); diff --git a/src/crypto/simd.h b/src/crypto/simd.h index 007f66e..6adf0c3 100644 --- a/src/crypto/simd.h +++ b/src/crypto/simd.h @@ -16,7 +16,12 @@ #include <asm/simd.h> #endif -static inline bool simd_get(void) +typedef enum { + HAVE_NO_SIMD, + HAVE_FULL_SIMD +} simd_context_t; + +static inline simd_context_t simd_get(void) { bool have_simd = false; #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) @@ -32,29 +37,29 @@ static inline bool simd_get(void) if (have_simd) kernel_neon_begin(); #endif - return have_simd; + return have_simd ? HAVE_FULL_SIMD : HAVE_NO_SIMD; } -static inline void simd_put(bool was_on) +static inline void simd_put(simd_context_t prior_context) { #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) - if (was_on) + if (prior_context != HAVE_NO_SIMD) kernel_fpu_end(); #elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE) - if (was_on) + if (prior_context != HAVE_NO_SIMD) kernel_neon_end(); #endif } -static inline bool simd_relax(bool was_on) +static inline simd_context_t simd_relax(simd_context_t prior_context) { #ifdef CONFIG_PREEMPT - if (was_on && need_resched()) { - simd_put(true); + if (prior_context != HAVE_NO_SIMD && need_resched()) { + simd_put(prior_context); return simd_get(); } #endif - return was_on; + return prior_context; } #endif /* _WG_SIMD_H */ |