summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/Makefile5
-rw-r--r--src/crypto/blake2s.c28
-rw-r--r--src/crypto/blake2s.h3
-rw-r--r--src/crypto/chacha20poly1305.c17
-rw-r--r--src/crypto/curve25519.c279
-rw-r--r--src/device.c12
-rw-r--r--src/hashtables.c9
-rw-r--r--src/messages.h2
-rw-r--r--src/netlink.c11
-rw-r--r--src/noise.c18
-rw-r--r--src/peer.c3
-rw-r--r--src/queueing.h3
-rw-r--r--src/ratelimiter.c6
-rw-r--r--src/receive.c10
-rw-r--r--src/send.c18
-rw-r--r--src/socket.c3
-rw-r--r--src/timers.c6
17 files changed, 269 insertions, 164 deletions
diff --git a/src/Makefile b/src/Makefile
index c4ccb55..86411fa 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -65,6 +65,9 @@ tools:
tools-debug:
@$(MAKE) -C tools V=1 DEBUG_TOOLS=y
+style:
+ $(KERNELDIR)/scripts/checkpatch.pl -f --max-line-length=4000 --codespell --color=always $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h)
+
check: clean
scan-build --view --keep-going $(MAKE) module tools CONFIG_WIREGUARD_DEBUG=y C=2 CF="-D__CHECK_ENDIAN__"
@@ -76,4 +79,4 @@ cloc:
-include tests/debug.mk
-.PHONY: all module module-debug module-install tools install dkms-install clean core-cloc check version.h dkms.conf
+.PHONY: all module module-debug module-install tools install dkms-install clean core-cloc check style version.h dkms.conf
diff --git a/src/crypto/blake2s.c b/src/crypto/blake2s.c
index ab37a0c..91f154f 100644
--- a/src/crypto/blake2s.c
+++ b/src/crypto/blake2s.c
@@ -65,6 +65,7 @@ static inline void blake2s_init_param(struct blake2s_state *state, const blake2s
{
const __le32 *p;
int i;
+
memset(state, 0, sizeof(struct blake2s_state));
for (i = 0; i < 8; ++i)
state->h[i] = blake2s_iv[i];
@@ -112,12 +113,12 @@ void blake2s_init_key(struct blake2s_state *state, const size_t outlen, const vo
#include <asm/processor.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>
-static bool blake2s_use_avx __read_mostly = false;
+static bool blake2s_use_avx __read_mostly;
void __init blake2s_fpu_init(void)
{
blake2s_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
}
-asmlinkage void blake2s_compress_avx(struct blake2s_state *state, const u8 * block, size_t nblocks, u32 inc);
+asmlinkage void blake2s_compress_avx(struct blake2s_state *state, const u8 *block, size_t nblocks, u32 inc);
#else
void __init blake2s_fpu_init(void) { }
#endif
@@ -160,7 +161,7 @@ static inline void blake2s_compress(struct blake2s_state *state, const u8 *block
v[14] = blake2s_iv[6] ^ state->f[0];
v[15] = blake2s_iv[7] ^ state->f[1];
-#define G(r,i,a,b,c,d) do { \
+#define G(r, i, a, b, c, d) do { \
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
d = ror32(d ^ a, 16); \
c += d; \
@@ -169,18 +170,18 @@ static inline void blake2s_compress(struct blake2s_state *state, const u8 *block
d = ror32(d ^ a, 8); \
c += d; \
b = ror32(b ^ c, 7); \
-} while(0)
+} while (0)
#define ROUND(r) do { \
- G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
- G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
- G(r,2,v[ 2],v[ 6],v[10],v[14]); \
- G(r,3,v[ 3],v[ 7],v[11],v[15]); \
- G(r,4,v[ 0],v[ 5],v[10],v[15]); \
- G(r,5,v[ 1],v[ 6],v[11],v[12]); \
- G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
- G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
-} while(0)
+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+ G(r, 2, v[2], v[ 6], v[10], v[14]); \
+ G(r, 3, v[3], v[ 7], v[11], v[15]); \
+ G(r, 4, v[0], v[ 5], v[10], v[15]); \
+ G(r, 5, v[1], v[ 6], v[11], v[12]); \
+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
ROUND(0);
ROUND(1);
ROUND(2);
@@ -206,6 +207,7 @@ static inline void blake2s_compress(struct blake2s_state *state, const u8 *block
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
{
const size_t fill = BLAKE2S_BLOCKBYTES - state->buflen;
+
if (unlikely(!inlen))
return;
if (inlen > fill) {
diff --git a/src/crypto/blake2s.h b/src/crypto/blake2s.h
index 99de9f9..9ed53ea 100644
--- a/src/crypto/blake2s.h
+++ b/src/crypto/blake2s.h
@@ -38,10 +38,12 @@ static inline void blake2s_final(struct blake2s_state *state, u8 *out, size_t ou
if (__builtin_constant_p(outlen) && !(outlen % sizeof(u32))) {
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || IS_ALIGNED((unsigned long)out, __alignof__(u32))) {
__le32 *outwords = (__le32 *)out;
+
for (i = 0; i < outlen / sizeof(u32); ++i)
outwords[i] = cpu_to_le32(state->h[i]);
} else {
__le32 buffer[BLAKE2S_OUTBYTES];
+
for (i = 0; i < outlen / sizeof(u32); ++i)
buffer[i] = cpu_to_le32(state->h[i]);
memcpy(out, buffer, outlen);
@@ -50,6 +52,7 @@ static inline void blake2s_final(struct blake2s_state *state, u8 *out, size_t ou
} else {
u8 buffer[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
__le32 *outwords = (__le32 *)buffer;
+
for (i = 0; i < 8; ++i)
outwords[i] = cpu_to_le32(state->h[i]);
memcpy(out, buffer, outlen);
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index 3163009..2ce7cbb 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -28,9 +28,9 @@ asmlinkage void poly1305_asm_2block_sse2(u32 *h, const u8 *src, const u32 *r, un
#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_asm_4block_avx2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u);
#endif
-static bool chacha20poly1305_use_avx2 __read_mostly = false;
-static bool chacha20poly1305_use_ssse3 __read_mostly = false;
-static bool chacha20poly1305_use_sse2 __read_mostly = false;
+static bool chacha20poly1305_use_avx2 __read_mostly;
+static bool chacha20poly1305_use_ssse3 __read_mostly;
+static bool chacha20poly1305_use_sse2 __read_mostly;
void chacha20poly1305_fpu_init(void)
{
chacha20poly1305_use_sse2 = boot_cpu_has(X86_FEATURE_XMM2);
@@ -42,7 +42,7 @@ void chacha20poly1305_fpu_init(void)
#include <asm/neon.h>
asmlinkage void chacha20_asm_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_asm_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-static bool chacha20poly1305_use_neon __read_mostly = false;
+static bool chacha20poly1305_use_neon __read_mostly;
void __init chacha20poly1305_fpu_init(void)
{
#if defined(CONFIG_ARM64)
@@ -458,7 +458,8 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
memset(m, 0, sizeof(m));
/* The poly1305 block function adds a hi-bit to the accumulator which
- * we don't need for key multiplication; compensate for it. */
+ * we don't need for key multiplication; compensate for it.
+ */
a[4] -= 1U << 24;
poly1305_asm_block_sse2(a, m, b, 1);
}
@@ -663,6 +664,7 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
{
bool have_simd;
+
have_simd = chacha20poly1305_init_simd();
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
chacha20poly1305_deinit_simd(have_simd);
@@ -696,6 +698,7 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr
ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
+
chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd);
ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
@@ -780,6 +783,7 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
{
bool have_simd, ret;
+
have_simd = chacha20poly1305_init_simd();
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
chacha20poly1305_deinit_simd(have_simd);
@@ -821,6 +825,7 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr
ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
+
poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd);
chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
@@ -863,6 +868,7 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
{
bool have_simd = chacha20poly1305_init_simd();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
+
hchacha20(derived_key, nonce, key, have_simd);
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
@@ -876,6 +882,7 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
{
bool ret, have_simd = chacha20poly1305_init_simd();
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
+
hchacha20(derived_key, nonce, key, have_simd);
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c
index 892da78..c594a56 100644
--- a/src/crypto/curve25519.c
+++ b/src/crypto/curve25519.c
@@ -30,7 +30,7 @@ static const u8 null_point[CURVE25519_POINT_SIZE] = { 0 };
#include <asm/processor.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>
-static bool curve25519_use_avx __read_mostly = false;
+static bool curve25519_use_avx __read_mostly;
void curve25519_fpu_init(void)
{
curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
@@ -129,6 +129,7 @@ static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secr
u8 e[32];
fe var[3];
fe51 x_51, z_51;
+
memcpy(e, secret, 32);
normalize_secret(e);
#define x1 var[0]
@@ -164,6 +165,7 @@ static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secr
u8 e[32];
fe var[3];
fe51 x_51, z_51;
+
memcpy(e, secret, 32);
normalize_secret(e);
curve25519_sandy2x_ladder_base(var, e);
@@ -195,7 +197,7 @@ static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secr
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void curve25519_asm_neon(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]);
-static bool curve25519_use_neon __read_mostly = false;
+static bool curve25519_use_neon __read_mostly;
void __init curve25519_fpu_init(void)
{
curve25519_use_neon = elf_hwcap & HWCAP_NEON;
@@ -272,7 +274,7 @@ static __always_inline void fscalar_product(felem output, const felem in, const
static __always_inline void fmul(felem output, const felem in2, const felem in)
{
u128 t[5];
- limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
+ limb r0, r1, r2, r3, r4, s0, s1, s2, s3, s4, c;
r0 = in[0];
r1 = in[1];
@@ -321,8 +323,8 @@ static __always_inline void fmul(felem output, const felem in2, const felem in)
static __always_inline void fsquare_times(felem output, const felem in, limb count)
{
u128 t[5];
- limb r0,r1,r2,r3,r4,c;
- limb d0,d1,d2,d4,d419;
+ limb r0, r1, r2, r3, r4, c;
+ limb d0, d1, d2, d4, d419;
r0 = in[0];
r1 = in[1];
@@ -351,7 +353,7 @@ static __always_inline void fsquare_times(felem output, const felem in, limb cou
r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL;
r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL;
r2 += c;
- } while(--count);
+ } while (--count);
output[0] = r0;
output[1] = r1;
@@ -452,6 +454,7 @@ static void fmonty(limb *x2, limb *z2, /* output 2Q */
limb *x3, limb *z3, /* output Q + Q' */
limb *x, limb *z, /* input Q */
limb *xprime, limb *zprime, /* input Q' */
+
const limb *qmqp /* input Q - Q' */)
{
limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], zzzprime[5];
@@ -489,11 +492,12 @@ static void fmonty(limb *x2, limb *z2, /* output 2Q */
*/
static void swap_conditional(limb a[5], limb b[5], limb iswap)
{
- unsigned i;
+ unsigned int i;
const limb swap = -iswap;
for (i = 0; i < 5; ++i) {
const limb x = swap & (a[i] ^ b[i]);
+
a[i] ^= x;
b[i] ^= x;
}
@@ -512,12 +516,13 @@ static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
- unsigned i, j;
+ unsigned int i, j;
memcpy(nqpqx, q, sizeof(limb) * 5);
for (i = 0; i < 32; ++i) {
u8 byte = n[31 - i];
+
for (j = 0; j < 8; ++j) {
const limb bit = byte >> 7;
@@ -554,7 +559,7 @@ static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
static void crecip(felem out, const felem z)
{
- felem a,t0,b,c;
+ felem a, t0, b, c;
/* 2 */ fsquare_times(a, z, 1); // a = 2
/* 8 */ fsquare_times(t0, a, 2);
@@ -633,12 +638,14 @@ typedef s64 limb;
* significant first. The value of the field element is:
* x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
*
- * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */
+ * i.e. the limbs are 26, 25, 26, 25, ... bits wide.
+ */
/* Sum two numbers: output += in */
static void fsum(limb *output, const limb *in)
{
- unsigned i;
+ unsigned int i;
+
for (i = 0; i < 10; i += 2) {
output[0 + i] = output[0 + i] + in[0 + i];
output[1 + i] = output[1 + i] + in[1 + i];
@@ -646,10 +653,12 @@ static void fsum(limb *output, const limb *in)
}
/* Find the difference of two numbers: output = in - output
- * (note the order of the arguments!). */
+ * (note the order of the arguments!).
+ */
static void fdifference(limb *output, const limb *in)
{
- unsigned i;
+ unsigned int i;
+
for (i = 0; i < 10; ++i)
output[i] = in[i] - output[i];
}
@@ -657,7 +666,8 @@ static void fdifference(limb *output, const limb *in)
/* Multiply a number by a scalar: output = in * scalar */
static void fscalar_product(limb *output, const limb *in, const limb scalar)
{
- unsigned i;
+ unsigned int i;
+
for (i = 0; i < 10; ++i)
output[i] = in[i] * scalar;
}
@@ -667,7 +677,8 @@ static void fscalar_product(limb *output, const limb *in, const limb scalar)
* output must be distinct to both inputs. The inputs are reduced coefficient
* form, the output is not.
*
- * output[x] <= 14 * the largest product of the input limbs. */
+ * output[x] <= 14 * the largest product of the input limbs.
+ */
static void fproduct(limb *output, const limb *in2, const limb *in)
{
output[0] = ((limb) ((s32) in2[0])) * ((s32) in[0]);
@@ -775,13 +786,15 @@ static void fproduct(limb *output, const limb *in2, const limb *in)
/* Reduce a long form to a short form by taking the input mod 2^255 - 19.
*
* On entry: |output[i]| < 14*2^54
- * On exit: |output[0..8]| < 280*2^54 */
+ * On exit: |output[0..8]| < 280*2^54
+ */
static void freduce_degree(limb *output)
{
/* Each of these shifts and adds ends up multiplying the value by 19.
*
* For output[0..8], the absolute entry value is < 14*2^54 and we add, at
- * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */
+ * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54.
+ */
output[8] += output[18] << 4;
output[8] += output[18] << 1;
output[8] += output[18];
@@ -817,7 +830,8 @@ static void freduce_degree(limb *output)
/* return v / 2^26, using only shifts and adds.
*
- * On entry: v can take any value. */
+ * On entry: v can take any value.
+ */
static inline limb div_by_2_26(const limb v)
{
/* High word of v; no shift needed. */
@@ -832,7 +846,8 @@ static inline limb div_by_2_26(const limb v)
/* return v / (2^25), using only shifts and adds.
*
- * On entry: v can take any value. */
+ * On entry: v can take any value.
+ */
static inline limb div_by_2_25(const limb v)
{
/* High word of v; no shift needed*/
@@ -847,10 +862,11 @@ static inline limb div_by_2_25(const limb v)
/* Reduce all coefficients of the short form input so that |x| < 2^26.
*
- * On entry: |output[i]| < 280*2^54 */
+ * On entry: |output[i]| < 280*2^54
+ */
static void freduce_coefficients(limb *output)
{
- unsigned i;
+ unsigned int i;
output[10] = 0;
@@ -859,7 +875,8 @@ static void freduce_coefficients(limb *output)
/* The entry condition (that |output[i]| < 280*2^54) means that over is, at
* most, 280*2^28 in the first iteration of this loop. This is added to the
* next limb and we can approximate the resulting bound of that limb by
- * 281*2^54. */
+ * 281*2^54.
+ */
output[i] -= over << 26;
output[i+1] += over;
@@ -868,7 +885,8 @@ static void freduce_coefficients(limb *output)
* be approximated as 281*2^54.
*
* For subsequent iterations of the loop, 281*2^54 remains a conservative
- * bound and no overflow occurs. */
+ * bound and no overflow occurs.
+ */
over = div_by_2_25(output[i+1]);
output[i+1] -= over << 25;
output[i+2] += over;
@@ -881,15 +899,18 @@ static void freduce_coefficients(limb *output)
output[10] = 0;
/* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29
- * So |over| will be no more than 2^16. */
+ * So |over| will be no more than 2^16.
+ */
{
limb over = div_by_2_26(output[0]);
+
output[0] -= over << 26;
output[1] += over;
}
/* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The
- * bound on |output[1]| is sufficient to meet our needs. */
+ * bound on |output[1]| is sufficient to meet our needs.
+ */
}
/* A helpful wrapper around fproduct: output = in * in2.
@@ -897,10 +918,12 @@ static void freduce_coefficients(limb *output)
* On entry: |in[i]| < 2^27 and |in2[i]| < 2^27.
*
* output must be distinct to both inputs. The output is reduced degree
- * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */
+ * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26.
+ */
static void fmul(limb *output, const limb *in, const limb *in2)
{
limb t[19];
+
fproduct(t, in, in2);
/* |t[i]| < 14*2^54 */
freduce_degree(t);
@@ -914,7 +937,8 @@ static void fmul(limb *output, const limb *in, const limb *in2)
* output must be distinct from the input. The inputs are reduced coefficient
* form, the output is not.
*
- * output[x] <= 14 * the largest product of the input limbs. */
+ * output[x] <= 14 * the largest product of the input limbs.
+ */
static void fsquare_inner(limb *output, const limb *in)
{
output[0] = ((limb) ((s32) in[0])) * ((s32) in[0]);
@@ -980,14 +1004,17 @@ static void fsquare_inner(limb *output, const limb *in)
* 2^27.
*
* On exit: The |output| argument is in reduced coefficients form (indeed, one
- * need only provide storage for 10 limbs) and |out[i]| < 2^26. */
+ * need only provide storage for 10 limbs) and |out[i]| < 2^26.
+ */
static void fsquare(limb *output, const limb *in)
{
limb t[19];
+
fsquare_inner(t, in);
/* |t[i]| < 14*2^54 because the largest product of two limbs will be <
* 2^(27+27) and fsquare_inner adds together, at most, 14 of those
- * products. */
+ * products.
+ */
freduce_degree(t);
freduce_coefficients(t);
/* |t[i]| < 2^26 */
@@ -997,7 +1024,7 @@ static void fsquare(limb *output, const limb *in)
/* Take a little-endian, 32-byte number and expand it into polynomial form */
static inline void fexpand(limb *output, const u8 *input)
{
-#define F(n,start,shift,mask) \
+#define F(n, start, shift, mask) \
output[n] = ((((limb) input[start + 0]) | \
((limb) input[start + 1]) << 8 | \
((limb) input[start + 2]) << 16 | \
@@ -1032,7 +1059,8 @@ static s32 s32_eq(s32 a, s32 b)
}
/* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
- * both non-negative. */
+ * both non-negative.
+ */
static s32 s32_gte(s32 a, s32 b)
{
a -= b;
@@ -1043,7 +1071,8 @@ static s32 s32_gte(s32 a, s32 b)
/* Take a fully reduced polynomial form number and contract it into a
* little-endian, 32-byte array.
*
- * On entry: |input_limbs[i]| < 2^26 */
+ * On entry: |input_limbs[i]| < 2^26
+ */
static void fcontract(u8 *output, limb *input_limbs)
{
int i;
@@ -1060,31 +1089,37 @@ static void fcontract(u8 *output, limb *input_limbs)
for (i = 0; i < 9; ++i) {
if ((i & 1) == 1) {
/* This calculation is a time-invariant way to make input[i]
- * non-negative by borrowing from the next-larger limb. */
+ * non-negative by borrowing from the next-larger limb.
+ */
const s32 mask = input[i] >> 31;
const s32 carry = -((input[i] & mask) >> 25);
+
input[i] = input[i] + (carry << 25);
input[i+1] = input[i+1] - carry;
} else {
const s32 mask = input[i] >> 31;
const s32 carry = -((input[i] & mask) >> 26);
+
input[i] = input[i] + (carry << 26);
input[i+1] = input[i+1] - carry;
}
}
/* There's no greater limb for input[9] to borrow from, but we can multiply
- * by 19 and borrow from input[0], which is valid mod 2^255-19. */
+ * by 19 and borrow from input[0], which is valid mod 2^255-19.
+ */
{
const s32 mask = input[9] >> 31;
const s32 carry = -((input[9] & mask) >> 25);
+
input[9] = input[9] + (carry << 25);
input[0] = input[0] - (carry * 19);
}
/* After the first iteration, input[1..9] are non-negative and fit within
* 25 or 26 bits, depending on position. However, input[0] may be
- * negative. */
+ * negative.
+ */
}
/* The first borrow-propagation pass above ended with every limb
@@ -1100,20 +1135,24 @@ static void fcontract(u8 *output, limb *input_limbs)
{
const s32 mask = input[0] >> 31;
const s32 carry = -((input[0] & mask) >> 26);
+
input[0] = input[0] + (carry << 26);
input[1] = input[1] - carry;
}
/* All input[i] are now non-negative. However, there might be values between
- * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */
+ * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide.
+ */
for (j = 0; j < 2; j++) {
for (i = 0; i < 9; i++) {
if ((i & 1) == 1) {
const s32 carry = input[i] >> 25;
+
input[i] &= 0x1ffffff;
input[i+1] += carry;
} else {
const s32 carry = input[i] >> 26;
+
input[i] &= 0x3ffffff;
input[i+1] += carry;
}
@@ -1121,6 +1160,7 @@ static void fcontract(u8 *output, limb *input_limbs)
{
const s32 carry = input[9] >> 25;
+
input[9] &= 0x1ffffff;
input[0] += 19*carry;
}
@@ -1131,11 +1171,13 @@ static void fcontract(u8 *output, limb *input_limbs)
* < 2^26 + 2*19, because the carry was, at most, two.
*
* If the second pass carried from input[9] again then input[0] is < 2*19 and
- * the input[9] -> input[0] carry didn't push input[0] out of bounds. */
+ * the input[9] -> input[0] carry didn't push input[0] out of bounds.
+ */
/* It still remains the case that input might be between 2^255-19 and 2^255.
* In this case, input[1..9] must take their maximum value and input[0] must
- * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */
+ * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed.
+ */
mask = s32_gte(input[0], 0x3ffffed);
for (i = 1; i < 10; i++) {
if ((i & 1) == 1) {
@@ -1146,7 +1188,8 @@ static void fcontract(u8 *output, limb *input_limbs)
}
/* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus
- * this conditionally subtracts 2^255-19. */
+ * this conditionally subtracts 2^255-19.
+ */
input[0] -= mask & 0x3ffffed;
for (i = 1; i < 10; i++) {
@@ -1172,16 +1215,16 @@ static void fcontract(u8 *output, limb *input_limbs)
output[s+3] = (input[i] >> 24) & 0xff;
output[0] = 0;
output[16] = 0;
- F(0,0);
- F(1,3);
- F(2,6);
- F(3,9);
- F(4,12);
- F(5,16);
- F(6,19);
- F(7,22);
- F(8,25);
- F(9,28);
+ F(0, 0);
+ F(1, 3);
+ F(2, 6);
+ F(3, 9);
+ F(4, 12);
+ F(5, 16);
+ F(6, 19);
+ F(7, 22);
+ F(8, 25);
+ F(9, 28);
#undef F
}
@@ -1193,14 +1236,16 @@ static void fcontract(u8 *output, limb *input_limbs)
* wrong results. Also, the two limb arrays must be in reduced-coefficient,
* reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped,
* and all all values in a[0..9],b[0..9] must have magnitude less than
- * INT32_MAX. */
+ * INT32_MAX.
+ */
static void swap_conditional(limb a[19], limb b[19], limb iswap)
{
- unsigned i;
+ unsigned int i;
const s32 swap = (s32) -iswap;
for (i = 0; i < 10; ++i) {
- const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) );
+ const s32 x = swap & (((s32)a[i]) ^ ((s32)b[i]));
+
a[i] = ((s32)a[i]) ^ x;
b[i] = ((s32)b[i]) ^ x;
}
@@ -1220,57 +1265,57 @@ static void crecip(limb *out, const limb *z)
limb t1[10];
int i;
- /* 2 */ fsquare(z2,z);
- /* 4 */ fsquare(t1,z2);
- /* 8 */ fsquare(t0,t1);
- /* 9 */ fmul(z9,t0,z);
- /* 11 */ fmul(z11,z9,z2);
- /* 22 */ fsquare(t0,z11);
- /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
-
- /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
- /* 2^7 - 2^2 */ fsquare(t1,t0);
- /* 2^8 - 2^3 */ fsquare(t0,t1);
- /* 2^9 - 2^4 */ fsquare(t1,t0);
- /* 2^10 - 2^5 */ fsquare(t0,t1);
- /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
-
- /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
- /* 2^12 - 2^2 */ fsquare(t1,t0);
- /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
- /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
-
- /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
- /* 2^22 - 2^2 */ fsquare(t1,t0);
- /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
- /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
-
- /* 2^41 - 2^1 */ fsquare(t1,t0);
- /* 2^42 - 2^2 */ fsquare(t0,t1);
- /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
- /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
-
- /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
- /* 2^52 - 2^2 */ fsquare(t1,t0);
- /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
- /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
-
- /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
- /* 2^102 - 2^2 */ fsquare(t0,t1);
- /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
- /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
-
- /* 2^201 - 2^1 */ fsquare(t0,t1);
- /* 2^202 - 2^2 */ fsquare(t1,t0);
- /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
- /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
-
- /* 2^251 - 2^1 */ fsquare(t1,t0);
- /* 2^252 - 2^2 */ fsquare(t0,t1);
- /* 2^253 - 2^3 */ fsquare(t1,t0);
- /* 2^254 - 2^4 */ fsquare(t0,t1);
- /* 2^255 - 2^5 */ fsquare(t1,t0);
- /* 2^255 - 21 */ fmul(out,t1,z11);
+ /* 2 */ fsquare(z2, z);
+ /* 4 */ fsquare(t1, z2);
+ /* 8 */ fsquare(t0, t1);
+ /* 9 */ fmul(z9, t0, z);
+ /* 11 */ fmul(z11, z9, z2);
+ /* 22 */ fsquare(t0, z11);
+ /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9);
+
+ /* 2^6 - 2^1 */ fsquare(t0, z2_5_0);
+ /* 2^7 - 2^2 */ fsquare(t1, t0);
+ /* 2^8 - 2^3 */ fsquare(t0, t1);
+ /* 2^9 - 2^4 */ fsquare(t1, t0);
+ /* 2^10 - 2^5 */ fsquare(t0, t1);
+ /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0);
+
+ /* 2^11 - 2^1 */ fsquare(t0, z2_10_0);
+ /* 2^12 - 2^2 */ fsquare(t1, t0);
+ /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+ /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0);
+
+ /* 2^21 - 2^1 */ fsquare(t0, z2_20_0);
+ /* 2^22 - 2^2 */ fsquare(t1, t0);
+ /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+ /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0);
+
+ /* 2^41 - 2^1 */ fsquare(t1, t0);
+ /* 2^42 - 2^2 */ fsquare(t0, t1);
+ /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1, t0); fsquare(t0, t1); }
+ /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0);
+
+ /* 2^51 - 2^1 */ fsquare(t0, z2_50_0);
+ /* 2^52 - 2^2 */ fsquare(t1, t0);
+ /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+ /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0);
+
+ /* 2^101 - 2^1 */ fsquare(t1, z2_100_0);
+ /* 2^102 - 2^2 */ fsquare(t0, t1);
+ /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1, t0); fsquare(t0, t1); }
+ /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0);
+
+ /* 2^201 - 2^1 */ fsquare(t0, t1);
+ /* 2^202 - 2^2 */ fsquare(t1, t0);
+ /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+ /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0);
+
+ /* 2^251 - 2^1 */ fsquare(t1, t0);
+ /* 2^252 - 2^2 */ fsquare(t0, t1);
+ /* 2^253 - 2^3 */ fsquare(t1, t0);
+ /* 2^254 - 2^4 */ fsquare(t0, t1);
+ /* 2^255 - 2^5 */ fsquare(t1, t0);
+ /* 2^255 - 21 */ fmul(out, t1, z11);
}
@@ -1285,11 +1330,13 @@ static void crecip(limb *out, const limb *z)
* qmqp: short form, preserved
*
* On entry and exit, the absolute value of the limbs of all inputs and outputs
- * are < 2^26. */
+ * are < 2^26.
+ */
static void fmonty(limb *x2, limb *z2, /* output 2Q */
limb *x3, limb *z3, /* output Q + Q' */
limb *x, limb *z, /* input Q */
limb *xprime, limb *zprime, /* input Q' */
+
const limb *qmqp /* input Q - Q' */)
{
limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
@@ -1309,7 +1356,8 @@ static void fmonty(limb *x2, limb *z2, /* output 2Q */
fproduct(xxprime, xprime, z);
/* |xxprime[i]| < 14*2^54: the largest product of two limbs will be <
* 2^(27+27) and fproduct adds together, at most, 14 of those products.
- * (Approximating that to 2^58 doesn't work out.) */
+ * (Approximating that to 2^58 doesn't work out.)
+ */
fproduct(zzprime, x, zprime);
/* |zzprime[i]| < 14*2^54 */
freduce_degree(xxprime);
@@ -1366,7 +1414,8 @@ static void fmonty(limb *x2, limb *z2, /* output 2Q */
*
* resultx/resultz: the x coordinate of the resulting curve point (short form)
* n: a little endian, 32-byte number
- * q: a point of the curve (short form) */
+ * q: a point of the curve (short form)
+ */
static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
{
limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
@@ -1374,12 +1423,13 @@ static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
- unsigned i, j;
+ unsigned int i, j;
memcpy(nqpqx, q, sizeof(limb) * 10);
for (i = 0; i < 32; ++i) {
u8 byte = n[31 - i];
+
for (j = 0; j < 8; ++j) {
const limb bit = byte >> 7;
@@ -1462,12 +1512,14 @@ struct other_stack {
* qmqp: short form, preserved
*
* On entry and exit, the absolute value of the limbs of all inputs and outputs
- * are < 2^26. */
+ * are < 2^26.
+ */
static void fmonty(struct other_stack *s,
limb *x2, limb *z2, /* output 2Q */
limb *x3, limb *z3, /* output Q + Q' */
limb *x, limb *z, /* input Q */
limb *xprime, limb *zprime, /* input Q' */
+
const limb *qmqp /* input Q - Q' */)
{
memcpy(s->origx, x, 10 * sizeof(limb));
@@ -1484,7 +1536,8 @@ static void fmonty(struct other_stack *s,
fproduct(s->xxprime, xprime, z);
/* |s->xxprime[i]| < 14*2^54: the largest product of two limbs will be <
* 2^(27+27) and fproduct adds together, at most, 14 of those products.
- * (Approximating that to 2^58 doesn't work out.) */
+ * (Approximating that to 2^58 doesn't work out.)
+ */
fproduct(s->zzprime, x, zprime);
/* |s->zzprime[i]| < 14*2^54 */
freduce_degree(s->xxprime);
@@ -1541,10 +1594,11 @@ static void fmonty(struct other_stack *s,
*
* resultx/resultz: the x coordinate of the resulting curve point (short form)
* n: a little endian, 32-byte number
- * q: a point of the curve (short form) */
+ * q: a point of the curve (short form)
+ */
static void cmult(struct other_stack *s, limb *resultx, limb *resultz, const u8 *n, const limb *q)
{
- unsigned i, j;
+ unsigned int i, j;
limb *nqpqx = s->a, *nqpqz = s->b, *nqx = s->c, *nqz = s->d, *t;
limb *nqpqx2 = s->e, *nqpqz2 = s->f, *nqx2 = s->g, *nqz2 = s->h;
@@ -1553,6 +1607,7 @@ static void cmult(struct other_stack *s, limb *resultx, limb *resultz, const u8
for (i = 0; i < 32; ++i) {
u8 byte = n[31 - i];
+
for (j = 0; j < 8; ++j) {
const limb bit = byte >> 7;
@@ -1599,6 +1654,7 @@ bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_P
#endif
{
struct other_stack *s = kzalloc(sizeof(struct other_stack), GFP_KERNEL);
+
if (unlikely(!s))
return false;
@@ -1619,6 +1675,7 @@ bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_P
bool curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
{
static const u8 basepoint[CURVE25519_POINT_SIZE] __aligned(32) = { 9 };
+
return curve25519(pub, secret, basepoint);
}
#endif
diff --git a/src/device.c b/src/device.c
index 8a2eb0a..593a91c 100644
--- a/src/device.c
+++ b/src/device.c
@@ -37,7 +37,8 @@ static int open(struct net_device *dev)
/* TODO: when we merge to mainline, put this check near the ip_rt_send_redirect
* call of ip_forward in net/ipv4/ip_forward.c, similar to the current secpath
* check, rather than turning it off like this. This is just a stop gap solution
- * while we're an out of tree module. */
+ * while we're an out of tree module.
+ */
IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
}
@@ -159,7 +160,8 @@ static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev)
continue;
/* We only need to keep the original dst around for icmp,
- * so at this point we're in a position to drop it. */
+ * so at this point we're in a position to drop it.
+ */
skb_dst_drop(skb);
__skb_queue_tail(&packets, skb);
@@ -167,7 +169,8 @@ static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev)
spin_lock_bh(&peer->staged_packet_queue.lock);
/* If the queue is getting too big, we start removing the oldest packets until it's small again.
- * We do this before adding the new packet, so we don't remove GSO segments that are in excess. */
+ * We do this before adding the new packet, so we don't remove GSO segments that are in excess.
+ */
while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS)
dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
@@ -310,7 +313,8 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t
list_add(&wg->device_list, &device_list);
/* We wait until the end to assign priv_destructor, so that register_netdevice doesn't
- * call it for us if it fails. */
+ * call it for us if it fails.
+ */
dev->priv_destructor = destruct;
pr_debug("%s: Interface created\n", dev->name);
diff --git a/src/hashtables.c b/src/hashtables.c
index a0c0c64..8d61f4c 100644
--- a/src/hashtables.c
+++ b/src/hashtables.c
@@ -7,7 +7,8 @@
static inline struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
{
/* siphash gives us a secure 64bit number based on a random key. Since the bits are
- * uniformly distributed, we can then mask off to get the bits we need. */
+ * uniformly distributed, we can then mask off to get the bits we need.
+ */
return &table->hashtable[siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key) & (HASH_SIZE(table->hashtable) - 1)];
}
@@ -52,7 +53,8 @@ struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, c
static inline struct hlist_head *index_bucket(struct index_hashtable *table, const __le32 index)
{
/* Since the indices are random and thus all bits are uniformly distributed,
- * we can find its bucket simply by masking. */
+ * we can find its bucket simply by masking.
+ */
return &table->hashtable[(__force u32)index & (HASH_SIZE(table->hashtable) - 1)];
}
@@ -103,7 +105,8 @@ search_unused_slot:
}
/* Once we've found an unused slot, we lock it, and then double-check
- * that nobody else stole it from us. */
+ * that nobody else stole it from us.
+ */
spin_lock_bh(&table->lock);
hlist_for_each_entry_rcu_bh(existing_entry, index_bucket(table, entry->index), index_hash) {
if (existing_entry->index == entry->index) {
diff --git a/src/messages.h b/src/messages.h
index 0937942..927b487 100644
--- a/src/messages.h
+++ b/src/messages.h
@@ -122,7 +122,7 @@ enum message_alignments {
#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
enum {
- HANDSHAKE_DSCP = 0b10001000 /* AF41, plus 00 ECN */
+ HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */
};
#endif /* _WG_MESSAGES_H */
diff --git a/src/netlink.c b/src/netlink.c
index 60c58e0..fa8a766 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -199,7 +199,8 @@ static int get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
ret = 0;
/* If the last cursor was removed via list_del_init in peer_remove, then we just treat
* this the same as there being no more peers left. The reason is that seq_nr should
- * indicate to userspace that this isn't a coherent dump anyway, so they'll try again. */
+ * indicate to userspace that this isn't a coherent dump anyway, so they'll try again.
+ */
if (list_empty(&wg->peer_list) || (last_peer_cursor && list_empty(&last_peer_cursor->peer_list))) {
nla_nest_cancel(skb, peers_nest);
goto out;
@@ -236,7 +237,8 @@ out:
/* At this point, we can't really deal ourselves with safely zeroing out
* the private key material after usage. This will need an additional API
- * in the kernel for marking skbs as zero_on_free. */
+ * in the kernel for marking skbs as zero_on_free.
+ */
}
static int get_device_done(struct netlink_callback *cb)
@@ -312,7 +314,8 @@ static int set_peer(struct wireguard_device *wg, struct nlattr **attrs)
down_read(&wg->static_identity.lock);
if (wg->static_identity.has_identity && !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), wg->static_identity.static_public, NOISE_PUBLIC_KEY_LEN)) {
/* We silently ignore peers that have the same public key as the device. The reason we do it silently
- * is that we'd like for people to be able to reuse the same set of API calls across peers. */
+ * is that we'd like for people to be able to reuse the same set of API calls across peers.
+ */
up_read(&wg->static_identity.lock);
ret = 0;
goto out;
@@ -420,7 +423,7 @@ static int set_device(struct sk_buff *skb, struct genl_info *info)
struct wireguard_peer *peer, *temp;
u8 public_key[NOISE_PUBLIC_KEY_LEN] = { 0 }, *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
/* We remove before setting, to prevent race, which means doing two 25519-genpub ops. */
- bool unused __attribute((unused)) = curve25519_generate_public(public_key, private_key);
+ __attribute((unused)) bool unused = curve25519_generate_public(public_key, private_key);
peer = pubkey_hashtable_lookup(&wg->peer_hashtable, public_key);
if (peer) {
diff --git a/src/noise.c b/src/noise.c
index 99c5f53..aab7f88 100644
--- a/src/noise.c
+++ b/src/noise.c
@@ -151,29 +151,34 @@ static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypai
current_keypair = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
if (new_keypair->i_am_the_initiator) {
/* If we're the initiator, it means we've sent a handshake, and received
- * a confirmation response, which means this new keypair can now be used. */
+ * a confirmation response, which means this new keypair can now be used.
+ */
if (next_keypair) {
/* If there already was a next keypair pending, we demote it to be
* the previous keypair, and free the existing current.
* TODO: note that this means KCI can result in this transition. It
* would perhaps be more sound to always just get rid of the unused
* next keypair instead of putting it in the previous slot, but this
- * might be a bit less robust. Something to think about and decide on. */
+ * might be a bit less robust. Something to think about and decide on.
+ */
rcu_assign_pointer(keypairs->next_keypair, NULL);
rcu_assign_pointer(keypairs->previous_keypair, next_keypair);
noise_keypair_put(current_keypair);
} else /* If there wasn't an existing next keypair, we replace the
- * previous with the current one. */
+ * previous with the current one.
+ */
rcu_assign_pointer(keypairs->previous_keypair, current_keypair);
/* At this point we can get rid of the old previous keypair, and set up
- * the new keypair. */
+ * the new keypair.
+ */
noise_keypair_put(previous_keypair);
rcu_assign_pointer(keypairs->current_keypair, new_keypair);
} else {
/* If we're the responder, it means we can't use the new keypair until
* we receive confirmation via the first data packet, so we get rid of
* the existing previous one, the possibly existing next one, and slide
- * in the new next one. */
+ * in the new next one.
+ */
rcu_assign_pointer(keypairs->next_keypair, new_keypair);
noise_keypair_put(next_keypair);
rcu_assign_pointer(keypairs->previous_keypair, NULL);
@@ -201,7 +206,8 @@ bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_k
/* When we've finally received the confirmation, we slide the next
* into the current, the current into the previous, and get rid of
- * the old previous. */
+ * the old previous.
+ */
old_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
rcu_assign_pointer(keypairs->previous_keypair, rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)));
noise_keypair_put(old_keypair);
diff --git a/src/peer.c b/src/peer.c
index 6ddad26..1580262 100644
--- a/src/peer.c
+++ b/src/peer.c
@@ -74,7 +74,8 @@ struct wireguard_peer *peer_rcu_get(struct wireguard_peer *peer)
/* We have a separate "remove" function to get rid of the final reference because
* peer_list, clearing handshakes, and flushing all require mutexes which requires
- * sleeping, which must only be done from certain contexts. */
+ * sleeping, which must only be done from certain contexts.
+ */
void peer_remove(struct wireguard_peer *peer)
{
if (unlikely(!peer))
diff --git a/src/queueing.h b/src/queueing.h
index bf66c59..a385d67 100644
--- a/src/queueing.h
+++ b/src/queueing.h
@@ -97,7 +97,8 @@ static inline int cpumask_choose_online(int *stored_cpu, unsigned int id)
* atomic sequence number, do an increment-and-return, and then iterate through
* every possible CPU until we get to that index -- choose_cpu. However that's
* a bit slower, and it doesn't seem like this potential race actually introduces
- * any performance loss, so we live with it. */
+ * any performance loss, so we live with it.
+ */
static inline int cpumask_next_online(int *next)
{
int cpu = *next;
diff --git a/src/ratelimiter.c b/src/ratelimiter.c
index a9caf32..8a73dd1 100644
--- a/src/ratelimiter.c
+++ b/src/ratelimiter.c
@@ -100,7 +100,8 @@ bool ratelimiter_allow(struct sk_buff *skb, struct net *net)
bool ret;
/* Inspired by nft_limit.c, but this is actually a slightly different
* algorithm. Namely, we incorporate the burst as part of the maximum
- * tokens, rather than as part of the rate. */
+ * tokens, rather than as part of the rate.
+ */
spin_lock(&entry->lock);
now = ktime_get_ns();
tokens = min_t(u64, TOKEN_MAX, entry->tokens + now - entry->last_time_ns);
@@ -149,7 +150,8 @@ int ratelimiter_init(void)
/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
* but what it shares in common is that it uses a massive hashtable. So,
* we borrow their wisdom about good table sizes on different systems
- * dependent on RAM. This calculation here comes from there. */
+ * dependent on RAM. This calculation here comes from there.
+ */
table_size = (totalram_pages > (1U << 30) / PAGE_SIZE) ? 8192 : max_t(unsigned long, 16, roundup_pow_of_two((totalram_pages << PAGE_SHIFT) / (1U << 14) / sizeof(struct hlist_head)));
max_entries = table_size * 8;
diff --git a/src/receive.c b/src/receive.c
index 923c711..36502dc 100644
--- a/src/receive.c
+++ b/src/receive.c
@@ -141,7 +141,8 @@ static void receive_handshake_packet(struct wireguard_device *wg, struct sk_buff
/* Calling this function will either send any existing packets in the queue
* and not send a keepalive, which is the best case, Or, if there's nothing
* in the queue, it will send a keepalive, in order to give immediate
- * confirmation of the session. */
+ * confirmation of the session.
+ */
packet_send_keepalive(peer);
}
break;
@@ -214,7 +215,8 @@ static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *
/* We ensure that the network header is part of the packet before we
* call skb_cow_data, so that there's no chance that data is removed
- * from the skb, so that later we can extract the original endpoint. */
+ * from the skb, so that later we can extract the original endpoint.
+ */
offset = skb->data - skb_network_header(skb);
skb_push(skb, offset);
num_frags = skb_cow_data(skb, 0, &trailer);
@@ -231,7 +233,8 @@ static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *
return false;
/* Another ugly situation of pushing and pulling the header so as to
- * keep endpoint information intact. */
+ * keep endpoint information intact.
+ */
skb_push(skb, offset);
if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
return false;
@@ -411,6 +414,7 @@ void packet_decrypt_worker(struct work_struct *work)
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
+
queue_enqueue_per_peer(&PACKET_PEER(skb)->rx_queue, skb, state);
}
}
diff --git a/src/send.c b/src/send.c
index 9e0f635..180d909 100644
--- a/src/send.c
+++ b/src/send.c
@@ -52,7 +52,8 @@ void packet_send_queued_handshake_initiation(struct wireguard_peer *peer, bool i
peer->timer_handshake_attempts = 0;
/* First checking the timestamp here is just an optimization; it will
- * be caught while properly locked inside the actual work queue. */
+ * be caught while properly locked inside the actual work queue.
+ */
if (!time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT))
return;
@@ -110,7 +111,8 @@ static inline unsigned int skb_padding(struct sk_buff *skb)
/* We do this modulo business with the MTU, just in case the networking layer
* gives us a packet that's bigger than the MTU. Since we support GSO, this
* isn't strictly neccessary, but it's better to be cautious here, especially
- * if that code ever changes. */
+ * if that code ever changes.
+ */
unsigned int last_unit = skb->len % skb->dev->mtu;
unsigned int padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1);
@@ -302,7 +304,8 @@ void packet_send_staged_packets(struct wireguard_peer *peer)
/* After we know we have a somewhat valid key, we now try to assign nonces to
* all of the packets in the queue. If we can't assign nonces for all of them,
- * we just consider it a failure and wait for the next handshake. */
+ * we just consider it a failure and wait for the next handshake.
+ */
skb_queue_walk(&packets, skb) {
PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb);
PACKET_CB(skb)->nonce = atomic64_inc_return(&key->counter.counter) - 1;
@@ -322,18 +325,21 @@ out_nokey:
noise_keypair_put(keypair);
/* We orphan the packets if we're waiting on a handshake, so that they
- * don't block a socket's pool. */
+ * don't block a socket's pool.
+ */
skb_queue_walk(&packets, skb)
skb_orphan(skb);
/* Then we put them back on the top of the queue. We're not too concerned about
* accidently getting things a little out of order if packets are being added
* really fast, because this queue is for before packets can even be sent and
- * it's small anyway. */
+ * it's small anyway.
+ */
spin_lock_bh(&peer->staged_packet_queue.lock);
skb_queue_splice(&packets, &peer->staged_packet_queue);
spin_unlock_bh(&peer->staged_packet_queue.lock);
/* If we're exiting because there's something wrong with the key, it means
- * we should initiate a new handshake. */
+ * we should initiate a new handshake.
+ */
packet_send_queued_handshake_initiation(peer, false);
}
diff --git a/src/socket.c b/src/socket.c
index d2e80d9..1ce74cd 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -242,7 +242,8 @@ void socket_set_peer_endpoint(struct wireguard_peer *peer, const struct endpoint
/* First we check unlocked, in order to optimize, since it's pretty rare
* that an endpoint will change. If we happen to be mid-write, and two
* CPUs wind up writing the same thing or something slightly different,
- * it doesn't really matter much either. */
+ * it doesn't really matter much either.
+ */
if (endpoint_eq(endpoint, &peer->endpoint))
return;
write_lock_bh(&peer->endpoint_lock);
diff --git a/src/timers.c b/src/timers.c
index 53a082b..17610b6 100644
--- a/src/timers.c
+++ b/src/timers.c
@@ -40,11 +40,13 @@ static void expired_retransmit_handshake(unsigned long ptr)
if (likely(timers_active(peer)))
del_timer(&peer->timer_send_keepalive);
/* We drop all packets without a keypair and don't try again,
- * if we try unsuccessfully for too long to make a handshake. */
+ * if we try unsuccessfully for too long to make a handshake.
+ */
skb_queue_purge(&peer->staged_packet_queue);
/* We set a timer for destroying any residue that might be left
- * of a partial exchange. */
+ * of a partial exchange.
+ */
if (likely(timers_active(peer)) && !timer_pending(&peer->timer_zero_key_material))
mod_timer(&peer->timer_zero_key_material, jiffies + (REJECT_AFTER_TIME * 3));
} else {