diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/crypto/chacha20-ssse3-x86_64.S | 102 | ||||
-rw-r--r-- | src/crypto/chacha20poly1305.c | 76 |
2 files changed, 157 insertions, 21 deletions
diff --git a/src/crypto/chacha20-ssse3-x86_64.S b/src/crypto/chacha20-ssse3-x86_64.S index d7600b3..be4b9b7 100644 --- a/src/crypto/chacha20-ssse3-x86_64.S +++ b/src/crypto/chacha20-ssse3-x86_64.S @@ -2,6 +2,7 @@ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions * * Copyright (C) 2015 Martin Willi + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +18,7 @@ ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 CTRINC: .octa 0x00000003000000020000000100000000 +CONST: .ascii "expand 32-byte k" .text @@ -625,3 +627,103 @@ ENTRY(chacha20_asm_4block_xor_ssse3) mov %r11,%rsp ret ENDPROC(chacha20_asm_4block_xor_ssse3) + +ENTRY(hchacha20_asm_ssse3) + # %rdi: 32 byte output key, o + # %rsi: 16 byte nonce, n + # %rdx: 32 byte input key, i + + # x0 = constant + movdqa CONST(%rip),%xmm0 + # x1, x2 = i + movdqu 0x00(%rdx),%xmm1 + movdqu 0x10(%rdx),%xmm2 + # x3 = n + movdqu 0x00(%rsi),%xmm3 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + movdqa %xmm2,%xmm10 + movdqa %xmm3,%xmm11 + movdqa ROT8(%rip),%xmm4 + movdqa ROT16(%rip),%xmm5 + + mov $10,%ecx + +.Lhdoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm3,%xmm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm3,%xmm3 + + dec %ecx + jnz .Lhdoubleround + + # o0 = x0 + movdqu %xmm0,0x00(%rdi) + # o1 = x3 + movdqu %xmm3,0x10(%rdi) + ret +ENDPROC(hchacha20_asm_ssse3) diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index d0fbe1c..611008e 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -16,6 +16,7 @@ #include <asm/cpufeature.h> #include <asm/processor.h> #ifdef CONFIG_AS_SSSE3 +asmlinkage void hchacha20_asm_ssse3(u8 *derived_key, const u8 *nonce, const u8 *key); asmlinkage void chacha20_asm_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_asm_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); #endif @@ -140,7 +141,7 @@ static void chacha20_generic_block(struct chacha20_ctx *ctx, void *stream) static const char constant[16] = "expand 32-byte k"; -static void hchacha20(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN]) +static void hchacha20_generic(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN]) { u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)]; __le32 *out = (__force __le32 *)derived_key; @@ -215,6 +216,22 @@ static void hchacha20(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16 out[7] = cpu_to_le32(x[15]); } +static inline void hchacha20(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN], bool have_simd) +{ + if (!have_simd) + goto no_simd; + +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3) + if (chacha20poly1305_use_ssse3) { + hchacha20_asm_ssse3(derived_key, nonce, key); + return; + } +#endif + +no_simd: + hchacha20_generic(derived_key, nonce, key); +} + static void chacha20_keysetup(struct chacha20_ctx *ctx, const u8 key[CHACHA20_KEY_SIZE], const u8 nonce[sizeof(u64)]) { ctx->state[0] = le32_to_cpuvp(constant + 0); @@ -464,7 +481,6 @@ static void poly1305_update(struct poly1305_ctx *ctx, const u8 *src, unsigned in if (ctx->buflen == POLY1305_BLOCK_SIZE) { #ifdef CONFIG_X86_64 - if (have_simd && chacha20poly1305_use_sse2) poly1305_simd_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE); else @@ -476,7 +492,6 @@ static void poly1305_update(struct poly1305_ctx *ctx, const u8 *src, unsigned in if (likely(srclen >= POLY1305_BLOCK_SIZE)) { #ifdef CONFIG_X86_64 - if (have_simd && chacha20poly1305_use_sse2) bytes = poly1305_simd_blocks(ctx, src, srclen); else @@ -568,16 +583,16 @@ static struct blkcipher_desc chacha20_desc = { .tfm = &chacha20_cipher }; -void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) +static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], + bool have_simd) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; u8 block0[CHACHA20_BLOCK_SIZE] = { 0 }; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd = chacha20poly1305_init_simd(); chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce); @@ -603,7 +618,15 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, memzero_explicit(&poly1305_state, sizeof(poly1305_state)); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); +} +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) +{ + bool have_simd; + have_simd = chacha20poly1305_init_simd(); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); chacha20poly1305_deinit_simd(have_simd); } @@ -665,9 +688,10 @@ err: return !ret; } -bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) +static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], + bool have_simd) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -677,13 +701,10 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, size_t dst_len; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd; if (unlikely(src_len < POLY1305_MAC_SIZE)) return false; - have_simd = chacha20poly1305_init_simd(); - chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce); chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd); @@ -713,10 +734,20 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, memzero_explicit(&chacha20_state, sizeof(chacha20_state)); - chacha20poly1305_deinit_simd(have_simd); return !ret; } +bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) +{ + bool have_simd, ret; + have_simd = chacha20poly1305_init_simd(); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); + chacha20poly1305_deinit_simd(have_simd); + return ret; +} + bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) @@ -792,10 +823,12 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - u8 derived_key[CHACHA20POLY1305_KEYLEN]; - hchacha20(derived_key, nonce, key); - chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key); + bool have_simd = chacha20poly1305_init_simd(); + u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); + hchacha20(derived_key, nonce, key, have_simd); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); + chacha20poly1305_deinit_simd(have_simd); } bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -803,11 +836,12 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - u8 derived_key[CHACHA20POLY1305_KEYLEN]; - bool ret; - hchacha20(derived_key, nonce, key); - ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key); + bool ret, have_simd = chacha20poly1305_init_simd(); + u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); + hchacha20(derived_key, nonce, key, have_simd); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); + chacha20poly1305_deinit_simd(have_simd); return ret; } |