summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/crypto/chacha20.c8
-rw-r--r--src/crypto/chacha20.h5
-rw-r--r--src/crypto/chacha20poly1305.c121
-rw-r--r--src/crypto/chacha20poly1305.h5
-rw-r--r--src/crypto/poly1305.c35
-rw-r--r--src/crypto/poly1305.h7
-rw-r--r--src/crypto/simd.h23
-rw-r--r--src/receive.c12
-rw-r--r--src/selftest/chacha20poly1305.h36
-rw-r--r--src/selftest/poly1305.h36
-rw-r--r--src/send.c12
11 files changed, 153 insertions, 147 deletions
diff --git a/src/crypto/chacha20.c b/src/crypto/chacha20.c
index a65ec8b..29f7e6b 100644
--- a/src/crypto/chacha20.c
+++ b/src/crypto/chacha20.c
@@ -151,9 +151,9 @@ static void chacha20_generic(u8 *out, const u8 *in, u32 len, const u32 key[8], c
}
}
-void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd)
+void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context)
{
- if (!have_simd
+ if (simd_context != HAVE_FULL_SIMD
#if defined(CONFIG_X86_64)
|| !chacha20_use_ssse3
@@ -227,10 +227,10 @@ static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[
out[7] = cpu_to_le32(x[15]);
}
-void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd)
+void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context)
{
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3)
- if (have_simd && chacha20_use_ssse3) {
+ if (simd_context == HAVE_FULL_SIMD && chacha20_use_ssse3) {
hchacha20_ssse3(derived_key, nonce, key);
return;
}
diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h
index 01fb99a..86ea4e3 100644
--- a/src/crypto/chacha20.h
+++ b/src/crypto/chacha20.h
@@ -6,6 +6,7 @@
#ifndef _WG_CHACHA20_H
#define _WG_CHACHA20_H
+#include "simd.h"
#include <linux/kernel.h>
#include <linux/types.h>
@@ -39,8 +40,8 @@ static inline void chacha20_init(struct chacha20_ctx *state, const u8 key[CHACHA
state->counter[2] = nonce & U32_MAX;
state->counter[3] = nonce >> 32;
}
-void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, bool have_simd);
+void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, simd_context_t simd_context);
-void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], bool have_simd);
+void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[HCHACHA20_NONCE_SIZE], const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context);
#endif /* _WG_CHACHA20_H */
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index b47f6a5..30d5444 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -6,7 +6,6 @@
#include "chacha20poly1305.h"
#include "chacha20.h"
#include "poly1305.h"
-#include "simd.h"
#include <linux/kernel.h>
#include <crypto/scatterwalk.h>
@@ -29,7 +28,7 @@ static struct blkcipher_desc chacha20_desc = {
static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd)
+ simd_context_t simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
@@ -39,22 +38,22 @@ static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size
} b = {{ 0 }};
chacha20_init(&chacha20_state, key, nonce);
- chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd);
- poly1305_init(&poly1305_state, b.block0, have_simd);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
+ poly1305_init(&poly1305_state, b.block0, simd_context);
- poly1305_update(&poly1305_state, ad, ad_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, ad, ad_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context);
- chacha20(&chacha20_state, dst, src, src_len, have_simd);
+ chacha20(&chacha20_state, dst, src, src_len, simd_context);
- poly1305_update(&poly1305_state, dst, src_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, dst, src_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
- poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context);
- poly1305_finish(&poly1305_state, dst + src_len, have_simd);
+ poly1305_finish(&poly1305_state, dst + src_len, simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
@@ -64,17 +63,17 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd;
+ simd_context_t simd_context;
- have_simd = simd_get();
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
- simd_put(have_simd);
+ simd_context = simd_get();
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context);
+ simd_put(simd_context);
}
bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd)
+ simd_context_t simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
@@ -87,11 +86,11 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr
} b = {{ 0 }};
chacha20_init(&chacha20_state, key, nonce);
- chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd);
- poly1305_init(&poly1305_state, b.block0, have_simd);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
+ poly1305_init(&poly1305_state, b.block0, simd_context);
- poly1305_update(&poly1305_state, ad, ad_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, ad, ad_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context);
if (likely(src_len)) {
blkcipher_walk_init(&walk, dst, src, src_len);
@@ -99,26 +98,26 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
- chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
- poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd);
+ chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context);
+ poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, simd_context);
ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
- chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
- poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd);
+ chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context);
+ poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, simd_context);
ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
}
}
if (unlikely(ret))
goto err;
- poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
- poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context);
- poly1305_finish(&poly1305_state, b.mac, have_simd);
+ poly1305_finish(&poly1305_state, b.mac, simd_context);
scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1);
err:
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
@@ -129,7 +128,7 @@ err:
static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd)
+ simd_context_t simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
@@ -145,25 +144,25 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size
return false;
chacha20_init(&chacha20_state, key, nonce);
- chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd);
- poly1305_init(&poly1305_state, b.block0, have_simd);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
+ poly1305_init(&poly1305_state, b.block0, simd_context);
- poly1305_update(&poly1305_state, ad, ad_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, ad, ad_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context);
dst_len = src_len - POLY1305_MAC_SIZE;
- poly1305_update(&poly1305_state, src, dst_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, src, dst_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(dst_len);
- poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context);
- poly1305_finish(&poly1305_state, b.mac, have_simd);
+ poly1305_finish(&poly1305_state, b.mac, simd_context);
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
if (likely(!ret))
- chacha20(&chacha20_state, dst, src, dst_len, have_simd);
+ chacha20(&chacha20_state, dst, src, dst_len, simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
@@ -175,18 +174,18 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd, ret;
+ simd_context_t simd_context, ret;
- have_simd = simd_get();
- ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
- simd_put(have_simd);
+ simd_context = simd_get();
+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context);
+ simd_put(simd_context);
return ret;
}
bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd)
+ simd_context_t simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
@@ -206,11 +205,11 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr
return false;
chacha20_init(&chacha20_state, key, nonce);
- chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd);
- poly1305_init(&poly1305_state, b.block0, have_simd);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
+ poly1305_init(&poly1305_state, b.block0, simd_context);
- poly1305_update(&poly1305_state, ad, ad_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, ad, ad_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context);
dst_len = src_len - POLY1305_MAC_SIZE;
if (likely(dst_len)) {
@@ -219,26 +218,26 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
- poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd);
- chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
+ poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, simd_context);
+ chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context);
ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
- poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd);
- chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
+ poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, simd_context);
+ chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context);
ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
}
}
if (unlikely(ret))
goto err;
- poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd);
+ poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(dst_len);
- poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context);
- poly1305_finish(&poly1305_state, b.computed_mac, have_simd);
+ poly1305_finish(&poly1305_state, b.computed_mac, simd_context);
scatterwalk_map_and_copy(b.read_mac, src, dst_len, POLY1305_MAC_SIZE, 0);
ret = crypto_memneq(b.read_mac, b.computed_mac, POLY1305_MAC_SIZE);
@@ -253,13 +252,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCELEN],
const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd = simd_get();
+ simd_context_t simd_context = simd_get();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
- hchacha20(derived_key, nonce, key, have_simd);
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd);
+ hchacha20(derived_key, nonce, key, simd_context);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
- simd_put(have_simd);
+ simd_put(simd_context);
}
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
@@ -267,13 +266,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCELEN],
const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool ret, have_simd = simd_get();
+ bool ret, simd_context = simd_get();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
- hchacha20(derived_key, nonce, key, have_simd);
- ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd);
+ hchacha20(derived_key, nonce, key, simd_context);
+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
- simd_put(have_simd);
+ simd_put(simd_context);
return ret;
}
diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h
index 43b0a17..eb846c6 100644
--- a/src/crypto/chacha20poly1305.h
+++ b/src/crypto/chacha20poly1305.h
@@ -6,6 +6,7 @@
#ifndef _WG_CHACHA20POLY1305_H
#define _WG_CHACHA20POLY1305_H
+#include "simd.h"
#include <linux/types.h>
struct scatterlist;
@@ -23,7 +24,7 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
bool __must_check chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd);
+ simd_context_t simd_context);
bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
@@ -32,7 +33,7 @@ bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t
bool __must_check chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
- bool have_simd);
+ simd_context_t simd_context);
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c
index 65a37d9..be2eb33 100644
--- a/src/crypto/poly1305.c
+++ b/src/crypto/poly1305.c
@@ -5,6 +5,7 @@
*/
#include "poly1305.h"
+#include "simd.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -237,7 +238,7 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
}
#endif
-void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd)
+void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context)
{
ctx->nonce[0] = le32_to_cpup((__le32 *)&key[16]);
ctx->nonce[1] = le32_to_cpup((__le32 *)&key[20]);
@@ -256,28 +257,28 @@ void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bo
ctx->num = 0;
}
-static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, bool have_simd)
+static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, const u32 padbit, simd_context_t simd_context)
{
#if defined(CONFIG_X86_64)
#ifdef CONFIG_AS_AVX512
- if (poly1305_use_avx512 && have_simd)
+ if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD)
poly1305_blocks_avx512(ctx, inp, len, padbit);
else
#endif
#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && have_simd)
+ if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD)
poly1305_blocks_avx2(ctx, inp, len, padbit);
else
#endif
#ifdef CONFIG_AS_AVX
- if (poly1305_use_avx && have_simd)
+ if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD)
poly1305_blocks_avx(ctx, inp, len, padbit);
else
#endif
poly1305_blocks_x86_64(ctx, inp, len, padbit);
#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#if defined(ARM_USE_NEON)
- if (poly1305_use_neon && have_simd)
+ if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD)
poly1305_blocks_neon(ctx, inp, len, padbit);
else
#endif
@@ -289,28 +290,28 @@ static inline void poly1305_blocks(void *ctx, const u8 *inp, const size_t len, c
#endif
}
-static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], bool have_simd)
+static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32 nonce[4], simd_context_t simd_context)
{
#if defined(CONFIG_X86_64)
#ifdef CONFIG_AS_AVX512
- if (poly1305_use_avx512 && have_simd)
+ if (poly1305_use_avx512 && simd_context == HAVE_FULL_SIMD)
poly1305_emit_avx(ctx, mac, nonce);
else
#endif
#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && have_simd)
+ if (poly1305_use_avx2 && simd_context == HAVE_FULL_SIMD)
poly1305_emit_avx(ctx, mac, nonce);
else
#endif
#ifdef CONFIG_AS_AVX
- if (poly1305_use_avx && have_simd)
+ if (poly1305_use_avx && simd_context == HAVE_FULL_SIMD)
poly1305_emit_avx(ctx, mac, nonce);
else
#endif
poly1305_emit_x86_64(ctx, mac, nonce);
#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#if defined(ARM_USE_NEON)
- if (poly1305_use_neon && have_simd)
+ if (poly1305_use_neon && simd_context == HAVE_FULL_SIMD)
poly1305_emit_neon(ctx, mac, nonce);
else
#endif
@@ -322,7 +323,7 @@ static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], const u32
#endif
}
-void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool have_simd)
+void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, simd_context_t simd_context)
{
const size_t num = ctx->num % POLY1305_BLOCK_SIZE;
size_t rem;
@@ -331,7 +332,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h
rem = POLY1305_BLOCK_SIZE - num;
if (len >= rem) {
memcpy(ctx->data + num, inp, rem);
- poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, have_simd);
+ poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, simd_context);
inp += rem;
len -= rem;
} else {
@@ -346,7 +347,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h
len -= rem;
if (len >= POLY1305_BLOCK_SIZE) {
- poly1305_blocks(ctx->opaque, inp, len, 1, have_simd);
+ poly1305_blocks(ctx->opaque, inp, len, 1, simd_context);
inp += len;
}
@@ -356,7 +357,7 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h
ctx->num = rem;
}
-void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd)
+void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context)
{
size_t num = ctx->num % POLY1305_BLOCK_SIZE;
@@ -364,10 +365,10 @@ void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool h
ctx->data[num++] = 1; /* pad bit */
while (num < POLY1305_BLOCK_SIZE)
ctx->data[num++] = 0;
- poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, have_simd);
+ poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, simd_context);
}
- poly1305_emit(ctx->opaque, mac, ctx->nonce, have_simd);
+ poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
/* zero out the state */
memzero_explicit(ctx, sizeof(*ctx));
diff --git a/src/crypto/poly1305.h b/src/crypto/poly1305.h
index 876234c..f8467bc 100644
--- a/src/crypto/poly1305.h
+++ b/src/crypto/poly1305.h
@@ -6,6 +6,7 @@
#ifndef _WG_POLY1305_H
#define _WG_POLY1305_H
+#include "simd.h"
#include <linux/types.h>
enum poly1305_lengths {
@@ -23,9 +24,9 @@ struct poly1305_ctx {
void poly1305_fpu_init(void);
-void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd);
-void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, bool have_simd);
-void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd);
+void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context);
+void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, simd_context_t simd_context);
+void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], simd_context_t simd_context);
#ifdef DEBUG
bool poly1305_selftest(void);
diff --git a/src/crypto/simd.h b/src/crypto/simd.h
index 007f66e..6adf0c3 100644
--- a/src/crypto/simd.h
+++ b/src/crypto/simd.h
@@ -16,7 +16,12 @@
#include <asm/simd.h>
#endif
-static inline bool simd_get(void)
+typedef enum {
+ HAVE_NO_SIMD,
+ HAVE_FULL_SIMD
+} simd_context_t;
+
+static inline simd_context_t simd_get(void)
{
bool have_simd = false;
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE)
@@ -32,29 +37,29 @@ static inline bool simd_get(void)
if (have_simd)
kernel_neon_begin();
#endif
- return have_simd;
+ return have_simd ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
}
-static inline void simd_put(bool was_on)
+static inline void simd_put(simd_context_t prior_context)
{
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE)
- if (was_on)
+ if (prior_context != HAVE_NO_SIMD)
kernel_fpu_end();
#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE)
- if (was_on)
+ if (prior_context != HAVE_NO_SIMD)
kernel_neon_end();
#endif
}
-static inline bool simd_relax(bool was_on)
+static inline simd_context_t simd_relax(simd_context_t prior_context)
{
#ifdef CONFIG_PREEMPT
- if (was_on && need_resched()) {
- simd_put(true);
+ if (prior_context != HAVE_NO_SIMD && need_resched()) {
+ simd_put(prior_context);
return simd_get();
}
#endif
- return was_on;
+ return prior_context;
}
#endif /* _WG_SIMD_H */
diff --git a/src/receive.c b/src/receive.c
index d3a698a..4e73da1 100644
--- a/src/receive.c
+++ b/src/receive.c
@@ -200,7 +200,7 @@ static inline void keep_key_fresh(struct wireguard_peer *peer)
}
}
-static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key, bool have_simd)
+static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key, simd_context_t simd_context)
{
struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
struct sk_buff *trailer;
@@ -233,7 +233,7 @@ static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *
if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
return false;
- if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key, have_simd))
+ if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key, simd_context))
return false;
/* Another ugly situation of pushing and pulling the header so as to
@@ -423,15 +423,15 @@ void packet_decrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
struct sk_buff *skb;
- bool have_simd = simd_get();
+ simd_context_t simd_context = simd_get();
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
- enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
+ enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, simd_context)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
queue_enqueue_per_peer_napi(&PACKET_PEER(skb)->rx_queue, skb, state);
- have_simd = simd_relax(have_simd);
+ simd_context = simd_relax(simd_context);
}
- simd_put(have_simd);
+ simd_put(simd_context);
}
static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb)
diff --git a/src/selftest/chacha20poly1305.h b/src/selftest/chacha20poly1305.h
index 51766d3..5835616 100644
--- a/src/selftest/chacha20poly1305.h
+++ b/src/selftest/chacha20poly1305.h
@@ -1427,7 +1427,7 @@ static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __i
static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u8 nonce[12], const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd = simd_get();
+ simd_context_t simd_context = simd_get();
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
union {
@@ -1439,18 +1439,18 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8
chacha20_state.counter[1] = le32_to_cpu(*(__le32 *)(nonce + 0));
chacha20_state.counter[2] = le32_to_cpu(*(__le32 *)(nonce + 4));
chacha20_state.counter[3] = le32_to_cpu(*(__le32 *)(nonce + 8));
- chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd);
- poly1305_init(&poly1305_state, b.block0, have_simd);
- poly1305_update(&poly1305_state, ad, ad_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
- chacha20(&chacha20_state, dst, src, src_len, have_simd);
- poly1305_update(&poly1305_state, dst, src_len, have_simd);
- poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd);
+ chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
+ poly1305_init(&poly1305_state, b.block0, simd_context);
+ poly1305_update(&poly1305_state, ad, ad_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context);
+ chacha20(&chacha20_state, dst, src, src_len, simd_context);
+ poly1305_update(&poly1305_state, dst, src_len, simd_context);
+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
- poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
- poly1305_finish(&poly1305_state, dst + src_len, have_simd);
- simd_put(have_simd);
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context);
+ poly1305_finish(&poly1305_state, dst + src_len, simd_context);
+ simd_put(simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
}
@@ -1478,7 +1478,7 @@ bool __init chacha20poly1305_selftest(void)
{
size_t i;
u8 computed_result[MAXIMUM_TEST_BUFFER_LEN], *heap_src, *heap_dst;
- bool success = true, ret, have_simd;
+ bool success = true, ret, simd_context;
struct scatterlist sg_src, sg_dst;
heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
@@ -1498,7 +1498,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- have_simd = simd_get();
+ simd_context = simd_get();
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
if (chacha20poly1305_enc_vectors[i].nlen != 8)
continue;
@@ -1506,13 +1506,13 @@ bool __init chacha20poly1305_selftest(void)
memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen);
sg_init_one(&sg_src, heap_src, chacha20poly1305_enc_vectors[i].ilen);
sg_init_one(&sg_dst, heap_dst, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE);
- ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, have_simd);
+ ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, simd_context);
if (!ret || memcmp(heap_dst, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
pr_info("chacha20poly1305 sg encryption self-test %zu: FAIL\n", i + 1);
success = false;
}
}
- simd_put(have_simd);
+ simd_put(simd_context);
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(computed_result, 0, sizeof(computed_result));
ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
@@ -1521,19 +1521,19 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- have_simd = simd_get();
+ simd_context = simd_get();
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(heap_dst, 0, MAXIMUM_TEST_BUFFER_LEN);
memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen);
sg_init_one(&sg_src, heap_src, chacha20poly1305_dec_vectors[i].ilen);
sg_init_one(&sg_dst, heap_dst, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE);
- ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, have_simd);
+ ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, simd_context);
if (!decryption_success(ret, chacha20poly1305_dec_vectors[i].failure, memcmp(heap_dst, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE))) {
pr_info("chacha20poly1305 sg decryption self-test %zu: FAIL\n", i + 1);
success = false;
}
}
- simd_put(have_simd);
+ simd_put(simd_context);
for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
memset(computed_result, 0, sizeof(computed_result));
xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key);
diff --git a/src/selftest/poly1305.h b/src/selftest/poly1305.h
index d1b7227..0ff2301 100644
--- a/src/selftest/poly1305.h
+++ b/src/selftest/poly1305.h
@@ -6,8 +6,6 @@
#ifdef DEBUG
-#include "../crypto/simd.h"
-
struct poly1305_testdata {
size_t size;
const u8 data[1024];
@@ -1489,7 +1487,7 @@ static const struct poly1305_testvec poly1305_testvecs[] = {
bool __init poly1305_selftest(void)
{
- bool have_simd = simd_get();
+ simd_context_t simd_context = simd_get();
bool success = true;
size_t i;
@@ -1509,9 +1507,9 @@ bool __init poly1305_selftest(void)
memset(out, 0, sizeof(out));
memset(&poly1305, 0, sizeof(poly1305));
- poly1305_init(&poly1305, key, have_simd);
- poly1305_update(&poly1305, in, inlen, have_simd);
- poly1305_finish(&poly1305, out, have_simd);
+ poly1305_init(&poly1305, key, simd_context);
+ poly1305_update(&poly1305, in, inlen, simd_context);
+ poly1305_finish(&poly1305, out, simd_context);
if (memcmp(out, expected, expectedlen)) {
pr_info("poly1305 self-test %zu: FAIL\n", i + 1);
success = false;
@@ -1520,10 +1518,10 @@ bool __init poly1305_selftest(void)
if (inlen > 16) {
memset(out, 0, sizeof(out));
memset(&poly1305, 0, sizeof(poly1305));
- poly1305_init(&poly1305, key, have_simd);
- poly1305_update(&poly1305, in, 1, have_simd);
- poly1305_update(&poly1305, in + 1, inlen - 1, have_simd);
- poly1305_finish(&poly1305, out, have_simd);
+ poly1305_init(&poly1305, key, simd_context);
+ poly1305_update(&poly1305, in, 1, simd_context);
+ poly1305_update(&poly1305, in + 1, inlen - 1, simd_context);
+ poly1305_finish(&poly1305, out, simd_context);
if (memcmp(out, expected, expectedlen)) {
pr_info("poly1305 self-test %zu/1+(N-1): FAIL\n", i + 1);
success = false;
@@ -1535,10 +1533,10 @@ bool __init poly1305_selftest(void)
memset(out, 0, sizeof(out));
memset(&poly1305, 0, sizeof(poly1305));
- poly1305_init(&poly1305, key, have_simd);
- poly1305_update(&poly1305, in, half, have_simd);
- poly1305_update(&poly1305, in + half, inlen - half, have_simd);
- poly1305_finish(&poly1305, out, have_simd);
+ poly1305_init(&poly1305, key, simd_context);
+ poly1305_update(&poly1305, in, half, simd_context);
+ poly1305_update(&poly1305, in + half, inlen - half, simd_context);
+ poly1305_finish(&poly1305, out, simd_context);
if (memcmp(out, expected, expectedlen)) {
pr_info("poly1305 self-test %zu/2: FAIL\n", i + 1);
success = false;
@@ -1547,10 +1545,10 @@ bool __init poly1305_selftest(void)
for (half = 16; half < inlen; half += 16) {
memset(out, 0, sizeof(out));
memset(&poly1305, 0, sizeof(poly1305));
- poly1305_init(&poly1305, key, have_simd);
- poly1305_update(&poly1305, in, half, have_simd);
- poly1305_update(&poly1305, in + half, inlen - half, have_simd);
- poly1305_finish(&poly1305, out, have_simd);
+ poly1305_init(&poly1305, key, simd_context);
+ poly1305_update(&poly1305, in, half, simd_context);
+ poly1305_update(&poly1305, in + half, inlen - half, simd_context);
+ poly1305_finish(&poly1305, out, simd_context);
if (memcmp(out, expected, expectedlen)) {
pr_info("poly1305 self-test %zu/%zu+%zu: FAIL\n", i + 1, half, inlen - half);
success = false;
@@ -1558,7 +1556,7 @@ bool __init poly1305_selftest(void)
}
}
}
- simd_put(have_simd);
+ simd_put(simd_context);
if (success)
pr_info("poly1305 self-tests: pass\n");
diff --git a/src/send.c b/src/send.c
index 3fc2a17..3af7ef3 100644
--- a/src/send.c
+++ b/src/send.c
@@ -126,7 +126,7 @@ static inline unsigned int skb_padding(struct sk_buff *skb)
return padded_size - last_unit;
}
-static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd)
+static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, simd_context_t simd_context)
{
struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
struct message_data *header;
@@ -167,7 +167,7 @@ static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypai
sg_init_table(sg, num_frags);
if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0)
return false;
- return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd);
+ return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, simd_context);
}
void packet_send_keepalive(struct wireguard_peer *peer)
@@ -243,13 +243,13 @@ void packet_encrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
struct sk_buff *first, *skb, *next;
- bool have_simd = simd_get();
+ simd_context_t simd_context = simd_get();
while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = PACKET_STATE_CRYPTED;
skb_walk_null_queue_safe(first, skb, next) {
- if (likely(skb_encrypt(skb, PACKET_CB(first)->keypair, have_simd)))
+ if (likely(skb_encrypt(skb, PACKET_CB(first)->keypair, simd_context)))
skb_reset(skb);
else {
state = PACKET_STATE_DEAD;
@@ -258,9 +258,9 @@ void packet_encrypt_worker(struct work_struct *work)
}
queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state);
- have_simd = simd_relax(have_simd);
+ simd_context = simd_relax(simd_context);
}
- simd_put(have_simd);
+ simd_put(simd_context);
}
static void packet_create_data(struct sk_buff *first)