summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-06-16 04:52:35 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2018-06-17 19:36:37 +0200
commit3f62999e8e27a7f315ef9346f34885d76334575a (patch)
tree72d1997606568842bb14603238ffe17b4aaadf02 /src
parent8186537ff6272a5533e4dcf4b1b428d9328bed44 (diff)
simd: encapsulate fpu amortization into nice functions
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src')
-rw-r--r--src/crypto/chacha20poly1305.c17
-rw-r--r--src/crypto/chacha20poly1305.h39
-rw-r--r--src/crypto/simd.h57
-rw-r--r--src/receive.c13
-rw-r--r--src/selftest/chacha20poly1305.h12
-rw-r--r--src/selftest/poly1305.h5
-rw-r--r--src/send.c11
7 files changed, 83 insertions, 71 deletions
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index ccc6e1c..df1c5aa 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -6,6 +6,7 @@
#include "chacha20poly1305.h"
#include "chacha20.h"
#include "poly1305.h"
+#include "simd.h"
#include <linux/kernel.h>
#include <crypto/scatterwalk.h>
@@ -65,9 +66,9 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
{
bool have_simd;
- have_simd = chacha20poly1305_init_simd();
+ have_simd = simd_get();
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
}
bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
@@ -176,9 +177,9 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
{
bool have_simd, ret;
- have_simd = chacha20poly1305_init_simd();
+ have_simd = simd_get();
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
return ret;
}
@@ -253,13 +254,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCELEN],
const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd = chacha20poly1305_init_simd();
+ bool have_simd = simd_get();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, have_simd);
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
}
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
@@ -267,13 +268,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 nonce[XCHACHA20POLY1305_NONCELEN],
const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool ret, have_simd = chacha20poly1305_init_simd();
+ bool ret, have_simd = simd_get();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, have_simd);
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
return ret;
}
diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h
index 1b122ac..43b0a17 100644
--- a/src/crypto/chacha20poly1305.h
+++ b/src/crypto/chacha20poly1305.h
@@ -44,45 +44,6 @@ bool __must_check xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t
const u8 nonce[XCHACHA20POLY1305_NONCELEN],
const u8 key[CHACHA20POLY1305_KEYLEN]);
-#if defined(CONFIG_X86_64)
-#include <linux/version.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
-#include <asm/neon.h>
-#include <asm/simd.h>
-#endif
-
-static inline bool chacha20poly1305_init_simd(void)
-{
- bool have_simd = false;
-#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE)
- have_simd = irq_fpu_usable();
- if (have_simd)
- kernel_fpu_begin();
-#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE)
-#if defined(CONFIG_ARM64)
- have_simd = true; /* ARM64 supports NEON in any context. */
-#elif defined(CONFIG_ARM)
- have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */
-#endif
- if (have_simd)
- kernel_neon_begin();
-#endif
- return have_simd;
-}
-
-static inline void chacha20poly1305_deinit_simd(bool was_on)
-{
-#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
- if (was_on)
- kernel_fpu_end();
-#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
- if (was_on)
- kernel_neon_end();
-#endif
-}
-
#ifdef DEBUG
bool chacha20poly1305_selftest(void);
#endif
diff --git a/src/crypto/simd.h b/src/crypto/simd.h
new file mode 100644
index 0000000..21e3c55
--- /dev/null
+++ b/src/crypto/simd.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_SIMD_H
+#define _WG_SIMD_H
+
+#if defined(CONFIG_X86_64)
+#include <linux/version.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+#include <asm/neon.h>
+#include <asm/simd.h>
+#endif
+
+static inline bool simd_get(void)
+{
+ bool have_simd = false;
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE)
+ have_simd = irq_fpu_usable();
+ if (have_simd)
+ kernel_fpu_begin();
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE)
+#if defined(CONFIG_ARM64)
+ have_simd = true; /* ARM64 supports NEON in any context. */
+#elif defined(CONFIG_ARM)
+ have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */
+#endif
+ if (have_simd)
+ kernel_neon_begin();
+#endif
+ return have_simd;
+}
+
+static inline void simd_put(bool was_on)
+{
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE)
+ if (was_on)
+ kernel_fpu_end();
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE)
+ if (was_on)
+ kernel_neon_end();
+#endif
+}
+
+static inline bool simd_relax(bool was_on)
+{
+ if (was_on && need_resched()) {
+ simd_put(true);
+ return simd_get();
+ }
+ return was_on;
+}
+
+#endif /* _WG_SIMD_H */
diff --git a/src/receive.c b/src/receive.c
index f33941b..c5062f8 100644
--- a/src/receive.c
+++ b/src/receive.c
@@ -10,6 +10,7 @@
#include "messages.h"
#include "cookie.h"
#include "socket.h"
+#include "crypto/simd.h"
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -418,21 +419,15 @@ void packet_decrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
struct sk_buff *skb;
- bool have_simd = chacha20poly1305_init_simd();
+ bool have_simd = simd_get();
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
-
queue_enqueue_per_peer(&PACKET_PEER(skb)->rx_queue, skb, state);
-
- /* Don't totally kill scheduling latency by keeping preemption disabled forever. */
- if (have_simd && need_resched()) {
- chacha20poly1305_deinit_simd(have_simd);
- have_simd = chacha20poly1305_init_simd();
- }
+ have_simd = simd_relax(have_simd);
}
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
}
static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb)
diff --git a/src/selftest/chacha20poly1305.h b/src/selftest/chacha20poly1305.h
index e9070ba..9baca0b 100644
--- a/src/selftest/chacha20poly1305.h
+++ b/src/selftest/chacha20poly1305.h
@@ -1286,7 +1286,7 @@ static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __i
static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u8 nonce[12], const u8 key[CHACHA20POLY1305_KEYLEN])
{
- bool have_simd = chacha20poly1305_init_simd();
+ bool have_simd = simd_get();
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
union {
@@ -1309,7 +1309,7 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8
b.lens[1] = cpu_to_le64(src_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd);
poly1305_finish(&poly1305_state, dst + src_len, have_simd);
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
}
@@ -1357,7 +1357,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- have_simd = chacha20poly1305_init_simd();
+ have_simd = simd_get();
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
if (chacha20poly1305_enc_vectors[i].nlen != 8)
continue;
@@ -1371,7 +1371,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(computed_result, 0, sizeof(computed_result));
ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
@@ -1380,7 +1380,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- have_simd = chacha20poly1305_init_simd();
+ have_simd = simd_get();
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(heap_dst, 0, MAXIMUM_TEST_BUFFER_LEN);
memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen);
@@ -1392,7 +1392,7 @@ bool __init chacha20poly1305_selftest(void)
success = false;
}
}
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
memset(computed_result, 0, sizeof(computed_result));
xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key);
diff --git a/src/selftest/poly1305.h b/src/selftest/poly1305.h
index 41acf7c..b3b74e1 100644
--- a/src/selftest/poly1305.h
+++ b/src/selftest/poly1305.h
@@ -7,6 +7,7 @@
#ifdef DEBUG
#include "../crypto/chacha20poly1305.h"
+#include "../crypto/simd.h"
struct poly1305_testdata {
size_t size;
@@ -1489,7 +1490,7 @@ static const struct poly1305_testvec poly1305_testvecs[] = {
bool __init poly1305_selftest(void)
{
- bool have_simd = chacha20poly1305_init_simd();
+ bool have_simd = simd_get();
bool success = true;
size_t i;
@@ -1558,7 +1559,7 @@ bool __init poly1305_selftest(void)
}
}
}
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
if (success)
pr_info("poly1305 self-tests: pass\n");
diff --git a/src/send.c b/src/send.c
index e97e439..d3e3d75 100644
--- a/src/send.c
+++ b/src/send.c
@@ -10,6 +10,7 @@
#include "socket.h"
#include "messages.h"
#include "cookie.h"
+#include "crypto/simd.h"
#include <linux/uio.h>
#include <linux/inetdevice.h>
@@ -242,7 +243,7 @@ void packet_encrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
struct sk_buff *first, *skb, *next;
- bool have_simd = chacha20poly1305_init_simd();
+ bool have_simd = simd_get();
while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = PACKET_STATE_CRYPTED;
@@ -257,13 +258,9 @@ void packet_encrypt_worker(struct work_struct *work)
}
queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state);
- /* Don't totally kill scheduling latency by keeping preemption disabled forever. */
- if (have_simd && need_resched()) {
- chacha20poly1305_deinit_simd(have_simd);
- have_simd = chacha20poly1305_init_simd();
- }
+ have_simd = simd_relax(have_simd);
}
- chacha20poly1305_deinit_simd(have_simd);
+ simd_put(have_simd);
}
static void packet_create_data(struct sk_buff *first)