diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2017-03-20 13:04:46 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2017-03-20 14:02:31 +0100 |
commit | 1cf6ef7e7f4b8882bd289ec254cd83731a7e11f3 (patch) | |
tree | 9f0bf9a3c504c10223f62a0eb13de429549d984e | |
parent | 233e5b29b1ac39a86c15f0ece862b88965b3901b (diff) |
curve25519: do dispatcher in C instead of asm, since shlx is haswell only
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r-- | src/crypto/curve25519-avx-x86_64.S | 153 | ||||
-rw-r--r-- | src/crypto/curve25519.c | 82 |
2 files changed, 73 insertions, 162 deletions
diff --git a/src/crypto/curve25519-avx-x86_64.S b/src/crypto/curve25519-avx-x86_64.S index bc2796d..bd660ef 100644 --- a/src/crypto/curve25519-avx-x86_64.S +++ b/src/crypto/curve25519-avx-x86_64.S @@ -3484,156 +3484,3 @@ ENTRY(curve25519_sandy2x_fe51_invert) popq %rbp ret ENDPROC(curve25519_sandy2x_fe51_invert) - -ENTRY(curve25519_sandy2x) - pushq %rbx - movq %rdi, %rbx - subq $368, %rsp - movq (%rsi), %rax - leaq 128(%rsp), %rdi - movq %rax, (%rsp) - movq 8(%rsi), %rax - andb $-8, (%rsp) - movq %rax, 8(%rsp) - movq 16(%rsi), %rax - movq %rax, 16(%rsp) - movq 24(%rsi), %rax - movq %rdx, %rsi - movq %rax, 24(%rsp) - shrq $56, %rax - andl $127, %eax - orl $64, %eax - movb %al, 31(%rsp) - call curve25519_sandy2x_fe_frombytes - leaq 128(%rsp), %rdi - movq %rsp, %rsi - call curve25519_sandy2x_ladder - movl $26, %eax - shlx %rax, 296(%rsp), %rax - leaq 80(%rsp), %rsi - addq 288(%rsp), %rax - movq %rsi, %rdi - movq %rax, 80(%rsp) - movl $26, %eax - shlx %rax, 312(%rsp), %rax - addq 304(%rsp), %rax - movq %rax, 88(%rsp) - movl $26, %eax - shlx %rax, 328(%rsp), %rax - addq 320(%rsp), %rax - movq %rax, 96(%rsp) - movl $26, %eax - shlx %rax, 344(%rsp), %rax - addq 336(%rsp), %rax - movq %rax, 104(%rsp) - movl $26, %eax - shlx %rax, 360(%rsp), %rax - addq 352(%rsp), %rax - movq %rax, 112(%rsp) - movl $26, %eax - shlx %rax, 216(%rsp), %rax - addq 208(%rsp), %rax - movq %rax, 32(%rsp) - movl $26, %eax - shlx %rax, 232(%rsp), %rax - addq 224(%rsp), %rax - movq %rax, 40(%rsp) - movl $26, %eax - shlx %rax, 248(%rsp), %rax - addq 240(%rsp), %rax - movq %rax, 48(%rsp) - movl $26, %eax - shlx %rax, 264(%rsp), %rax - addq 256(%rsp), %rax - movq %rax, 56(%rsp) - movl $26, %eax - shlx %rax, 280(%rsp), %rax - addq 272(%rsp), %rax - movq %rax, 64(%rsp) - call curve25519_sandy2x_fe51_invert - leaq 32(%rsp), %rsi - leaq 80(%rsp), %rdx - movq %rsi, %rdi - call curve25519_sandy2x_fe51_mul - leaq 32(%rsp), %rsi - movq %rbx, %rdi - call curve25519_sandy2x_fe51_pack - addq $368, %rsp - popq %rbx - ret -ENDPROC(curve25519_sandy2x) - -ENTRY(curve25519_sandy2x_base) - pushq %rbx - movq %rdi, %rbx - subq $368, %rsp - movq (%rsi), %rax - leaq 128(%rsp), %rdi - movq %rax, (%rsp) - movq 8(%rsi), %rax - andb $-8, (%rsp) - movq %rax, 8(%rsp) - movq 16(%rsi), %rax - movq %rax, 16(%rsp) - movq 24(%rsi), %rax - movq %rsp, %rsi - movq %rax, 24(%rsp) - shrq $56, %rax - andl $127, %eax - orl $64, %eax - movb %al, 31(%rsp) - call curve25519_sandy2x_ladder_base - movl $26, %eax - shlx %rax, 216(%rsp), %rax - leaq 80(%rsp), %rsi - addq 208(%rsp), %rax - movq %rsi, %rdi - movq %rax, 80(%rsp) - movl $26, %eax - shlx %rax, 232(%rsp), %rax - addq 224(%rsp), %rax - movq %rax, 88(%rsp) - movl $26, %eax - shlx %rax, 248(%rsp), %rax - addq 240(%rsp), %rax - movq %rax, 96(%rsp) - movl $26, %eax - shlx %rax, 264(%rsp), %rax - addq 256(%rsp), %rax - movq %rax, 104(%rsp) - movl $26, %eax - shlx %rax, 280(%rsp), %rax - addq 272(%rsp), %rax - movq %rax, 112(%rsp) - movl $26, %eax - shlx %rax, 136(%rsp), %rax - addq 128(%rsp), %rax - movq %rax, 32(%rsp) - movl $26, %eax - shlx %rax, 152(%rsp), %rax - addq 144(%rsp), %rax - movq %rax, 40(%rsp) - movl $26, %eax - shlx %rax, 168(%rsp), %rax - addq 160(%rsp), %rax - movq %rax, 48(%rsp) - movl $26, %eax - shlx %rax, 184(%rsp), %rax - addq 176(%rsp), %rax - movq %rax, 56(%rsp) - movl $26, %eax - shlx %rax, 200(%rsp), %rax - addq 192(%rsp), %rax - movq %rax, 64(%rsp) - call curve25519_sandy2x_fe51_invert - leaq 32(%rsp), %rsi - leaq 80(%rsp), %rdx - movq %rsi, %rdi - call curve25519_sandy2x_fe51_mul - leaq 32(%rsp), %rsi - movq %rbx, %rdi - call curve25519_sandy2x_fe51_pack - addq $368, %rsp - popq %rbx - ret -ENDPROC(curve25519_sandy2x_base) diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c index 539c31a..d9cdd8d 100644 --- a/src/crypto/curve25519.c +++ b/src/crypto/curve25519.c @@ -10,6 +10,13 @@ #include <linux/random.h> #include <crypto/algapi.h> +static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE]) +{ + secret[0] &= 248; + secret[31] &= 127; + secret[31] |= 64; +} + #ifdef CONFIG_X86_64 #include <asm/cpufeature.h> #include <asm/processor.h> @@ -20,19 +27,76 @@ void curve25519_fpu_init(void) { curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX); } -asmlinkage void curve25519_sandy2x(u8 *q, const u8 *n, const u8 *p); -asmlinkage void curve25519_sandy2x_base(u8 *q, const u8 *n); -#else -void curve25519_fpu_init(void) { } -#endif -static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE]) +typedef u64 fe[10]; +typedef u64 fe51[5]; +asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *); +asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *); +asmlinkage void curve25519_sandy2x_fe_frombytes(fe, const u8 *); +asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *); +asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *); +asmlinkage void curve25519_sandy2x_fe51_invert(fe51 *, const fe51 *); + +static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]) { - secret[0] &= 248; - secret[31] &= 127; - secret[31] |= 64; + u8 e[32]; + fe var[3]; + fe51 x_51, z_51; + memcpy(e, secret, 32); + normalize_secret(e); +#define x1 var[0] +#define x2 var[1] +#define z2 var[2] + curve25519_sandy2x_fe_frombytes(x1, basepoint); + curve25519_sandy2x_ladder(var, e); + z_51[0] = (z2[1] << 26) + z2[0]; + z_51[1] = (z2[3] << 26) + z2[2]; + z_51[2] = (z2[5] << 26) + z2[4]; + z_51[3] = (z2[7] << 26) + z2[6]; + z_51[4] = (z2[9] << 26) + z2[8]; + x_51[0] = (x2[1] << 26) + x2[0]; + x_51[1] = (x2[3] << 26) + x2[2]; + x_51[2] = (x2[5] << 26) + x2[4]; + x_51[3] = (x2[7] << 26) + x2[6]; + x_51[4] = (x2[9] << 26) + x2[8]; +#undef x1 +#undef x2 +#undef z2 + curve25519_sandy2x_fe51_invert(&z_51, &z_51); + curve25519_sandy2x_fe51_mul(&x_51, &x_51, &z_51); + curve25519_sandy2x_fe51_pack(mypublic, &x_51); } +static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE]) +{ + u8 e[32]; + fe var[3]; + fe51 x_51, z_51; + memcpy(e, secret, 32); + normalize_secret(e); + curve25519_sandy2x_ladder_base(var, e); +#define x2 var[0] +#define z2 var[1] + z_51[0] = (z2[1] << 26) + z2[0]; + z_51[1] = (z2[3] << 26) + z2[2]; + z_51[2] = (z2[5] << 26) + z2[4]; + z_51[3] = (z2[7] << 26) + z2[6]; + z_51[4] = (z2[9] << 26) + z2[8]; + x_51[0] = (x2[1] << 26) + x2[0]; + x_51[1] = (x2[3] << 26) + x2[2]; + x_51[2] = (x2[5] << 26) + x2[4]; + x_51[3] = (x2[7] << 26) + x2[6]; + x_51[4] = (x2[9] << 26) + x2[8]; +#undef x2 +#undef z2 + curve25519_sandy2x_fe51_invert(&z_51, &z_51); + curve25519_sandy2x_fe51_mul(&x_51, &x_51, &z_51); + curve25519_sandy2x_fe51_pack(pub, &x_51); +} +#else +void curve25519_fpu_init(void) { } +#endif + #ifdef __SIZEOF_INT128__ typedef u64 limb; typedef limb felem[5]; |