diff options
author | Samuel Neves <sneves@dei.uc.pt> | 2018-07-28 06:09:52 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-07-28 14:51:19 +0200 |
commit | 06bab82a51b769ca96297d09ed96afcbcfb36189 (patch) | |
tree | 24bb343c5933398d320b61ea1eea7a77cb0e5063 | |
parent | 2ff62d8431b18db111c126dd2ef26e5417da1c62 (diff) |
curve25519-x86_64: simplify the final reduction by adding 19 beforehand
Correctness can be quickly verified with the following z3py script:
>>> from z3 import *
>>> x = BitVec("x", 256) # any 256-bit value
>>> ref = URem(x, 2**255 - 19) # correct value
>>> t = Extract(255, 255, x); x &= 2**255 - 1; # btrq $63, %3
>>> u = If(t != 0, BitVecVal(38, 256), BitVecVal(19, 256)) # cmovncl %k5, %k4
>>> x += u # addq %4, %0; adcq $0, %1; adcq $0, %2; adcq $0, %3;
>>> t = Extract(255, 255, x); x &= 2**255 - 1; # btrq $63, %3
>>> u = If(t != 0, BitVecVal(0, 256), BitVecVal(19, 256)) # cmovncl %k5, %k4
>>> x -= u # subq %4, %0; sbbq $0, %1; sbbq $0, %2; sbbq $0, %3;
>>> prove(x == ref)
proved
Change inspired by Andy Polyakov's OpenSSL implementation.
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r-- | src/crypto/curve25519-x86_64.h | 66 |
1 files changed, 26 insertions, 40 deletions
diff --git a/src/crypto/curve25519-x86_64.h b/src/crypto/curve25519-x86_64.h index fba4f00..29204de 100644 --- a/src/crypto/curve25519-x86_64.h +++ b/src/crypto/curve25519-x86_64.h @@ -1609,48 +1609,34 @@ static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) */ static __always_inline void fred_eltfp25519_1w(u64 *const c) { + u64 tmp0, tmp1; asm volatile( - /* First, obtains a number less than 2^255. */ - "btrq $63, 24(%0) ;" - "sbbl %%ecx, %%ecx ;" - "andq $19, %%rcx ;" - "addq %%rcx, (%0) ;" - "adcq $0, 8(%0) ;" - "adcq $0, 16(%0) ;" - "adcq $0, 24(%0) ;" - - "btrq $63, 24(%0) ;" - "sbbl %%ecx, %%ecx ;" - "andq $19, %%rcx ;" - "addq %%rcx, (%0) ;" - "adcq $0, 8(%0) ;" - "adcq $0, 16(%0) ;" - "adcq $0, 24(%0) ;" - - /* Then, in case the number fall into [2^255-19, 2^255-1] */ - "cmpq $-19, (%0) ;" - "setaeb %%al ;" - "cmpq $-1, 8(%0) ;" - "setzb %%bl ;" - "cmpq $-1, 16(%0) ;" - "setzb %%cl ;" - "movq 24(%0), %%rdx ;" - "addq $1, %%rdx ;" - "shrq $63, %%rdx ;" - "andb %%bl, %%al ;" - "andb %%dl, %%cl ;" - "test %%cl, %%al ;" - "movl $0, %%eax ;" - "movl $19, %%ecx ;" - "cmovnz %%rcx, %%rax ;" - "addq %%rax, (%0) ;" - "adcq $0, 8(%0) ;" - "adcq $0, 16(%0) ;" - "adcq $0, 24(%0) ;" - "btrq $63, 24(%0) ;" + "movl $19, %k5 ;" + "movl $38, %k4 ;" + + "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ + "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ + + /* Add either 19 or 38 to c */ + "addq %4, %0 ;" + "adcq $0, %1 ;" + "adcq $0, %2 ;" + "adcq $0, %3 ;" + + /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ + "movl $0, %k4 ;" + "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ + "btrq $63, %3 ;" /* Clear bit 255 */ + + /* Subtract 19 if necessary */ + "subq %4, %0 ;" + "sbbq $0, %1 ;" + "sbbq $0, %2 ;" + "sbbq $0, %3 ;" + + : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "=r"(tmp0), "=r"(tmp1) : - : "r"(c) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx"); + : "memory", "cc"); } static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) |