diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/crypto/chacha20.c | 8 | ||||
-rw-r--r-- | src/crypto/chacha20.h | 5 | ||||
-rw-r--r-- | src/crypto/chacha20poly1305.c | 121 | ||||
-rw-r--r-- | src/crypto/chacha20poly1305.h | 5 | ||||
-rw-r--r-- | src/crypto/poly1305.c | 35 | ||||
-rw-r--r-- | src/crypto/poly1305.h | 7 | ||||
-rw-r--r-- | src/crypto/simd.h | 23 | ||||
-rw-r--r-- | src/receive.c | 12 | ||||
-rw-r--r-- | src/selftest/chacha20poly1305.h | 36 | ||||
-rw-r--r-- | src/selftest/poly1305.h | 36 | ||||
-rw-r--r-- | src/send.c | 12 |
11 files changed, 153 insertions, 147 deletions
diff --git a/src/crypto/chacha20.c b/src/crypto/chacha20.c index a65ec8b..29f7e6b 100644 --- a/src/crypto/chacha20.c +++ b/src/crypto/chacha20.c @@ -151,9 +151,9 @@ static void chacha20_generic(u8 *out, const u8 *in, u32 len, const u32 key[8], c } } -void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd) +void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context) { - if (!have_simd + if (simd_context != HAVE_FULL_SIMD #if defined(CONFIG_X86_64) || !chacha20_use_ssse3 @@ -227,10 +227,10 @@ static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[ out[7] = cpu_to_le32(x[15]); } -void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd) +void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context) { #if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3) - if (have_simd && chacha20_use_ssse3) { + if (simd_context == HAVE_FULL_SIMD && chacha20_use_ssse3) { hchacha20_ssse3(derived_key, nonce, key); return; } diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h index 01fb99a..86ea4e3 100644 --- a/src/crypto/chacha20.h +++ b/src/crypto/chacha20.h @@ -6,6 +6,7 @@ #ifndef _WG_CHACHA20_H #define _WG_CHACHA20_H +#include "simd.h" #include <linux/kernel.h> #include <linux/types.h> @@ -39,8 +40,8 @@ static inline void chacha20_init(struct chacha20_ctx *state, const u8 key[CHACHA state->counter[2] = nonce & U32_MAX; state->counter[3] = nonce >> 32; } -void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd); +void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context); -void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd); +void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context); #endif /* _WG_CHACHA20_H */ diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index b47f6a5..30d5444 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -6,7 +6,6 @@ #include "chacha20poly1305.h" #include "chacha20.h" #include "poly1305.h" -#include "simd.h" #include <linux/kernel.h> #include <crypto/scatterwalk.h> @@ -29,7 +28,7 @@ static struct blkcipher_desc chacha20_desc = { static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -39,22 +38,22 @@ static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size } b = {{ 0 }}; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - chacha20(&chacha20_state, dst, src, src_len, have_simd); + chacha20(&chacha20_state, dst, src, src_len, simd_context); - poly1305_update(&poly1305_state, dst, src_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, dst, src_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, dst + src_len, have_simd); + poly1305_finish(&poly1305_state, dst + src_len, simd_context); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); @@ -64,17 +63,17 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd; + simd_context_t simd_context; - have_simd = simd_get(); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - simd_put(have_simd); + simd_context = simd_get(); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + simd_put(simd_context); } bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -87,11 +86,11 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr } b = {{ 0 }}; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); if (likely(src_len)) { blkcipher_walk_init(&walk, dst, src, src_len); @@ -99,26 +98,26 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.mac, have_simd); + poly1305_finish(&poly1305_state, b.mac, simd_context); scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); err: memzero_explicit(&chacha20_state, sizeof(chacha20_state)); @@ -129,7 +128,7 @@ err: static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -145,25 +144,25 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); dst_len = src_len - POLY1305_MAC_SIZE; - poly1305_update(&poly1305_state, src, dst_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, src, dst_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.mac, have_simd); + poly1305_finish(&poly1305_state, b.mac, simd_context); ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE); if (likely(!ret)) - chacha20(&chacha20_state, dst, src, dst_len, have_simd); + chacha20(&chacha20_state, dst, src, dst_len, simd_context); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); @@ -175,18 +174,18 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd, ret; + simd_context_t simd_context, ret; - have_simd = simd_get(); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - simd_put(have_simd); + simd_context = simd_get(); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + simd_put(simd_context); return ret; } bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd) + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -206,11 +205,11 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); dst_len = src_len - POLY1305_MAC_SIZE; if (likely(dst_len)) { @@ -219,26 +218,26 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); + poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); + poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_finish(&poly1305_state, b.computed_mac, have_simd); + poly1305_finish(&poly1305_state, b.computed_mac, simd_context); scatterwalk_map_and_copy(b.read_mac, src, dst_len, POLY1305_MAC_SIZE, 0); ret = crypto_memneq(b.read_mac, b.computed_mac, POLY1305_MAC_SIZE); @@ -253,13 +252,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); - hchacha20(derived_key, nonce, key, have_simd); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); + hchacha20(derived_key, nonce, key, simd_context); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - simd_put(have_simd); + simd_put(simd_context); } bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -267,13 +266,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool ret, have_simd = simd_get(); + bool ret, simd_context = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); - hchacha20(derived_key, nonce, key, have_simd); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); + hchacha20(derived_key, nonce, key, simd_context); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - simd_put(have_simd); + simd_put(simd_context); return ret; } diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h index 43b0a17..eb846c6 100644 --- a/src/crypto/chacha20poly1305.h +++ b/src/crypto/chacha20poly1305.h @@ -6,6 +6,7 @@ #ifndef _WG_CHACHA20POLY1305_H #define _WG_CHACHA20POLY1305_H +#include "simd.h" #include <linux/types.h> struct scatterlist; @@ -23,7 +24,7 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, bool __must_check chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd); + simd_context_t simd_context); bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, @@ -32,7 +33,7 @@ bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t bool __must_check chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - bool have_simd); + simd_context_t simd_context); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c index 65a37d9..be2eb33 100644 --- a/src/crypto/poly1305.c +++ b/src/crypto/poly1305.c @@ -5,6 +5,7 @@ */ #include "poly1305.h" +#include "simd.h" #include <linux/kernel.h> #include <linux/string.h> @@ -237,7 +238,7 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4]) } #endif -void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd) +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context) { ctx->nonce[0] = le32_to_cpup((__le32 *)&key[16]); ctx->nonce[1] = le32_to_cpup((__le32 *)&key[20]); @@ -256,28 +257,28 @@ void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bo ctx->num = 0; } -static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, bool have_simd) +static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, simd_context_t simd_context) { #if defined(CONFIG_X86_64) #ifdef CONFIG_AS_AVX512 - if (poly1305_use_avx512 && have_simd) + if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx512(ctx, inp, len, padbit); else #endif #ifdef CONFIG_AS_AVX2 - if (poly1305_use_avx2 && have_simd) + if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx2(ctx, inp, len, padbit); else #endif #ifdef CONFIG_AS_AVX - if (poly1305_use_avx && have_simd) + if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD) poly1305_blocks_avx(ctx, inp, len, padbit); else #endif poly1305_blocks_x86_64(ctx, inp, len, padbit); #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(ARM_USE_NEON) - if (poly1305_use_neon && have_simd) + if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD) poly1305_blocks_neon(ctx, inp, len, padbit); else #endif @@ -289,28 +290,28 @@ static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, c #endif } -static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], bool have_simd) +static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], simd_context_t simd_context) { #if defined(CONFIG_X86_64) #ifdef CONFIG_AS_AVX512 - if (poly1305_use_avx512 && have_simd) + if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif #ifdef CONFIG_AS_AVX2 - if (poly1305_use_avx2 && have_simd) + if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif #ifdef CONFIG_AS_AVX - if (poly1305_use_avx && have_simd) + if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD) poly1305_emit_avx(ctx, mac, nonce); else #endif poly1305_emit_x86_64(ctx, mac, nonce); #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(ARM_USE_NEON) - if (poly1305_use_neon && have_simd) + if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD) poly1305_emit_neon(ctx, mac, nonce); else #endif @@ -322,7 +323,7 @@ static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 #endif } -void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool have_simd) +void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, simd_context_t simd_context) { const size_t num = ctx->num % POLY1305_BLOCK_SIZE; size_t rem; @@ -331,7 +332,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h rem = POLY1305_BLOCK_SIZE - num; if (len >= rem) { memcpy(ctx->data + num, inp, rem); - poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, have_simd); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, simd_context); inp += rem; len -= rem; } else { @@ -346,7 +347,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h len -= rem; if (len >= POLY1305_BLOCK_SIZE) { - poly1305_blocks(ctx->opaque, inp, len, 1, have_simd); + poly1305_blocks(ctx->opaque, inp, len, 1, simd_context); inp += len; } @@ -356,7 +357,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h ctx->num = rem; } -void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd) +void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context) { size_t num = ctx->num % POLY1305_BLOCK_SIZE; @@ -364,10 +365,10 @@ void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool h ctx->data[num++] = 1; /* pad bit */ while (num < POLY1305_BLOCK_SIZE) ctx->data[num++] = 0; - poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, have_simd); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, simd_context); } - poly1305_emit(ctx->opaque, mac, ctx->nonce, have_simd); + poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context); /* zero out the state */ memzero_explicit(ctx, sizeof(*ctx)); diff --git a/src/crypto/poly1305.h b/src/crypto/poly1305.h index 876234c..f8467bc 100644 --- a/src/crypto/poly1305.h +++ b/src/crypto/poly1305.h @@ -6,6 +6,7 @@ #ifndef _WG_POLY1305_H #define _WG_POLY1305_H +#include "simd.h" #include <linux/types.h> enum poly1305_lengths { @@ -23,9 +24,9 @@ struct poly1305_ctx { void poly1305_fpu_init(void); -void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd); -void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, bool have_simd); -void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd); +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context); +void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, simd_context_t simd_context); +void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context); #ifdef DEBUG bool poly1305_selftest(void); diff --git a/src/crypto/simd.h b/src/crypto/simd.h index 007f66e..6adf0c3 100644 --- a/src/crypto/simd.h +++ b/src/crypto/simd.h @@ -16,7 +16,12 @@ #include <asm/simd.h> #endif -static inline bool simd_get(void) +typedef enum { + HAVE_NO_SIMD, + HAVE_FULL_SIMD +} simd_context_t; + +static inline simd_context_t simd_get(void) { bool have_simd = false; #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) @@ -32,29 +37,29 @@ static inline bool simd_get(void) if (have_simd) kernel_neon_begin(); #endif - return have_simd; + return have_simd ? HAVE_FULL_SIMD : HAVE_NO_SIMD; } -static inline void simd_put(bool was_on) +static inline void simd_put(simd_context_t prior_context) { #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) - if (was_on) + if (prior_context != HAVE_NO_SIMD) kernel_fpu_end(); #elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE) - if (was_on) + if (prior_context != HAVE_NO_SIMD) kernel_neon_end(); #endif } -static inline bool simd_relax(bool was_on) +static inline simd_context_t simd_relax(simd_context_t prior_context) { #ifdef CONFIG_PREEMPT - if (was_on && need_resched()) { - simd_put(true); + if (prior_context != HAVE_NO_SIMD && need_resched()) { + simd_put(prior_context); return simd_get(); } #endif - return was_on; + return prior_context; } #endif /* _WG_SIMD_H */ diff --git a/src/receive.c b/src/receive.c index d3a698a..4e73da1 100644 --- a/src/receive.c +++ b/src/receive.c @@ -200,7 +200,7 @@ static inline void keep_key_fresh(struct wireguard_peer *peer) } } -static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key, bool have_simd) +static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key, simd_context_t simd_context) { struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; struct sk_buff *trailer; @@ -233,7 +233,7 @@ static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key * if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) return false; - if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key, have_simd)) + if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key, simd_context)) return false; /* Another ugly situation of pushing and pulling the header so as to @@ -423,15 +423,15 @@ void packet_decrypt_worker(struct work_struct *work) { struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; struct sk_buff *skb; - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { - enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, simd_context)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; queue_enqueue_per_peer_napi(&PACKET_PEER(skb)->rx_queue, skb, state); - have_simd = simd_relax(have_simd); + simd_context = simd_relax(simd_context); } - simd_put(have_simd); + simd_put(simd_context); } static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb) diff --git a/src/selftest/chacha20poly1305.h b/src/selftest/chacha20poly1305.h index 51766d3..5835616 100644 --- a/src/selftest/chacha20poly1305.h +++ b/src/selftest/chacha20poly1305.h @@ -1427,7 +1427,7 @@ static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __i static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u8 nonce[12], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; union { @@ -1439,18 +1439,18 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 chacha20_state.counter[1] = le32_to_cpu(*(__le32 *)(nonce + 0)); chacha20_state.counter[2] = le32_to_cpu(*(__le32 *)(nonce + 4)); chacha20_state.counter[3] = le32_to_cpu(*(__le32 *)(nonce + 8)); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); - poly1305_init(&poly1305_state, b.block0, have_simd); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); - chacha20(&chacha20_state, dst, src, src_len, have_simd); - poly1305_update(&poly1305_state, dst, src_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + poly1305_init(&poly1305_state, b.block0, simd_context); + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); + chacha20(&chacha20_state, dst, src, src_len, simd_context); + poly1305_update(&poly1305_state, dst, src_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); - poly1305_finish(&poly1305_state, dst + src_len, have_simd); - simd_put(have_simd); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); + poly1305_finish(&poly1305_state, dst + src_len, simd_context); + simd_put(simd_context); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); } @@ -1478,7 +1478,7 @@ bool __init chacha20poly1305_selftest(void) { size_t i; u8 computed_result[MAXIMUM_TEST_BUFFER_LEN], *heap_src, *heap_dst; - bool success = true, ret, have_simd; + bool success = true, ret, simd_context; struct scatterlist sg_src, sg_dst; heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); @@ -1498,7 +1498,7 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - have_simd = simd_get(); + simd_context = simd_get(); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { if (chacha20poly1305_enc_vectors[i].nlen != 8) continue; @@ -1506,13 +1506,13 @@ bool __init chacha20poly1305_selftest(void) memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen); sg_init_one(&sg_src, heap_src, chacha20poly1305_enc_vectors[i].ilen); sg_init_one(&sg_dst, heap_dst, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE); - ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, have_simd); + ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, simd_context); if (!ret || memcmp(heap_dst, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) { pr_info("chacha20poly1305 sg encryption self-test %zu: FAIL\n", i + 1); success = false; } } - simd_put(have_simd); + simd_put(simd_context); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { memset(computed_result, 0, sizeof(computed_result)); ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key); @@ -1521,19 +1521,19 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - have_simd = simd_get(); + simd_context = simd_get(); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { memset(heap_dst, 0, MAXIMUM_TEST_BUFFER_LEN); memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen); sg_init_one(&sg_src, heap_src, chacha20poly1305_dec_vectors[i].ilen); sg_init_one(&sg_dst, heap_dst, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE); - ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, have_simd); + ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, simd_context); if (!decryption_success(ret, chacha20poly1305_dec_vectors[i].failure, memcmp(heap_dst, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE))) { pr_info("chacha20poly1305 sg decryption self-test %zu: FAIL\n", i + 1); success = false; } } - simd_put(have_simd); + simd_put(simd_context); for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { memset(computed_result, 0, sizeof(computed_result)); xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key); diff --git a/src/selftest/poly1305.h b/src/selftest/poly1305.h index d1b7227..0ff2301 100644 --- a/src/selftest/poly1305.h +++ b/src/selftest/poly1305.h @@ -6,8 +6,6 @@ #ifdef DEBUG -#include "../crypto/simd.h" - struct poly1305_testdata { size_t size; const u8 data[1024]; @@ -1489,7 +1487,7 @@ static const struct poly1305_testvec poly1305_testvecs[] = { bool __init poly1305_selftest(void) { - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); bool success = true; size_t i; @@ -1509,9 +1507,9 @@ bool __init poly1305_selftest(void) memset(out, 0, sizeof(out)); memset(&poly1305, 0, sizeof(poly1305)); - poly1305_init(&poly1305, key, have_simd); - poly1305_update(&poly1305, in, inlen, have_simd); - poly1305_finish(&poly1305, out, have_simd); + poly1305_init(&poly1305, key, simd_context); + poly1305_update(&poly1305, in, inlen, simd_context); + poly1305_finish(&poly1305, out, simd_context); if (memcmp(out, expected, expectedlen)) { pr_info("poly1305 self-test %zu: FAIL\n", i + 1); success = false; @@ -1520,10 +1518,10 @@ bool __init poly1305_selftest(void) if (inlen > 16) { memset(out, 0, sizeof(out)); memset(&poly1305, 0, sizeof(poly1305)); - poly1305_init(&poly1305, key, have_simd); - poly1305_update(&poly1305, in, 1, have_simd); - poly1305_update(&poly1305, in + 1, inlen - 1, have_simd); - poly1305_finish(&poly1305, out, have_simd); + poly1305_init(&poly1305, key, simd_context); + poly1305_update(&poly1305, in, 1, simd_context); + poly1305_update(&poly1305, in + 1, inlen - 1, simd_context); + poly1305_finish(&poly1305, out, simd_context); if (memcmp(out, expected, expectedlen)) { pr_info("poly1305 self-test %zu/1+(N-1): FAIL\n", i + 1); success = false; @@ -1535,10 +1533,10 @@ bool __init poly1305_selftest(void) memset(out, 0, sizeof(out)); memset(&poly1305, 0, sizeof(poly1305)); - poly1305_init(&poly1305, key, have_simd); - poly1305_update(&poly1305, in, half, have_simd); - poly1305_update(&poly1305, in + half, inlen - half, have_simd); - poly1305_finish(&poly1305, out, have_simd); + poly1305_init(&poly1305, key, simd_context); + poly1305_update(&poly1305, in, half, simd_context); + poly1305_update(&poly1305, in + half, inlen - half, simd_context); + poly1305_finish(&poly1305, out, simd_context); if (memcmp(out, expected, expectedlen)) { pr_info("poly1305 self-test %zu/2: FAIL\n", i + 1); success = false; @@ -1547,10 +1545,10 @@ bool __init poly1305_selftest(void) for (half = 16; half < inlen; half += 16) { memset(out, 0, sizeof(out)); memset(&poly1305, 0, sizeof(poly1305)); - poly1305_init(&poly1305, key, have_simd); - poly1305_update(&poly1305, in, half, have_simd); - poly1305_update(&poly1305, in + half, inlen - half, have_simd); - poly1305_finish(&poly1305, out, have_simd); + poly1305_init(&poly1305, key, simd_context); + poly1305_update(&poly1305, in, half, simd_context); + poly1305_update(&poly1305, in + half, inlen - half, simd_context); + poly1305_finish(&poly1305, out, simd_context); if (memcmp(out, expected, expectedlen)) { pr_info("poly1305 self-test %zu/%zu+%zu: FAIL\n", i + 1, half, inlen - half); success = false; @@ -1558,7 +1556,7 @@ bool __init poly1305_selftest(void) } } } - simd_put(have_simd); + simd_put(simd_context); if (success) pr_info("poly1305 self-tests: pass\n"); @@ -126,7 +126,7 @@ static inline unsigned int skb_padding(struct sk_buff *skb) return padded_size - last_unit; } -static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd) +static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, simd_context_t simd_context) { struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1]; struct message_data *header; @@ -167,7 +167,7 @@ static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypai sg_init_table(sg, num_frags); if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0) return false; - return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd); + return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, simd_context); } void packet_send_keepalive(struct wireguard_peer *peer) @@ -243,13 +243,13 @@ void packet_encrypt_worker(struct work_struct *work) { struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; struct sk_buff *first, *skb, *next; - bool have_simd = simd_get(); + simd_context_t simd_context = simd_get(); while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { enum packet_state state = PACKET_STATE_CRYPTED; skb_walk_null_queue_safe(first, skb, next) { - if (likely(skb_encrypt(skb, PACKET_CB(first)->keypair, have_simd))) + if (likely(skb_encrypt(skb, PACKET_CB(first)->keypair, simd_context))) skb_reset(skb); else { state = PACKET_STATE_DEAD; @@ -258,9 +258,9 @@ void packet_encrypt_worker(struct work_struct *work) } queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state); - have_simd = simd_relax(have_simd); + simd_context = simd_relax(simd_context); } - simd_put(have_simd); + simd_put(simd_context); } static void packet_create_data(struct sk_buff *first) |