summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSamuel Neves <sneves@dei.uc.pt>2018-07-28 06:09:52 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2018-07-28 14:51:19 +0200
commit06bab82a51b769ca96297d09ed96afcbcfb36189 (patch)
tree24bb343c5933398d320b61ea1eea7a77cb0e5063
parent2ff62d8431b18db111c126dd2ef26e5417da1c62 (diff)
curve25519-x86_64: simplify the final reduction by adding 19 beforehand
Correctness can be quickly verified with the following z3py script: >>> from z3 import * >>> x = BitVec("x", 256) # any 256-bit value >>> ref = URem(x, 2**255 - 19) # correct value >>> t = Extract(255, 255, x); x &= 2**255 - 1; # btrq $63, %3 >>> u = If(t != 0, BitVecVal(38, 256), BitVecVal(19, 256)) # cmovncl %k5, %k4 >>> x += u # addq %4, %0; adcq $0, %1; adcq $0, %2; adcq $0, %3; >>> t = Extract(255, 255, x); x &= 2**255 - 1; # btrq $63, %3 >>> u = If(t != 0, BitVecVal(0, 256), BitVecVal(19, 256)) # cmovncl %k5, %k4 >>> x -= u # subq %4, %0; sbbq $0, %1; sbbq $0, %2; sbbq $0, %3; >>> prove(x == ref) proved Change inspired by Andy Polyakov's OpenSSL implementation. Signed-off-by: Samuel Neves <sneves@dei.uc.pt> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r--src/crypto/curve25519-x86_64.h66
1 files changed, 26 insertions, 40 deletions
diff --git a/src/crypto/curve25519-x86_64.h b/src/crypto/curve25519-x86_64.h
index fba4f00..29204de 100644
--- a/src/crypto/curve25519-x86_64.h
+++ b/src/crypto/curve25519-x86_64.h
@@ -1609,48 +1609,34 @@ static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
*/
static __always_inline void fred_eltfp25519_1w(u64 *const c)
{
+ u64 tmp0, tmp1;
asm volatile(
- /* First, obtains a number less than 2^255. */
- "btrq $63, 24(%0) ;"
- "sbbl %%ecx, %%ecx ;"
- "andq $19, %%rcx ;"
- "addq %%rcx, (%0) ;"
- "adcq $0, 8(%0) ;"
- "adcq $0, 16(%0) ;"
- "adcq $0, 24(%0) ;"
-
- "btrq $63, 24(%0) ;"
- "sbbl %%ecx, %%ecx ;"
- "andq $19, %%rcx ;"
- "addq %%rcx, (%0) ;"
- "adcq $0, 8(%0) ;"
- "adcq $0, 16(%0) ;"
- "adcq $0, 24(%0) ;"
-
- /* Then, in case the number fall into [2^255-19, 2^255-1] */
- "cmpq $-19, (%0) ;"
- "setaeb %%al ;"
- "cmpq $-1, 8(%0) ;"
- "setzb %%bl ;"
- "cmpq $-1, 16(%0) ;"
- "setzb %%cl ;"
- "movq 24(%0), %%rdx ;"
- "addq $1, %%rdx ;"
- "shrq $63, %%rdx ;"
- "andb %%bl, %%al ;"
- "andb %%dl, %%cl ;"
- "test %%cl, %%al ;"
- "movl $0, %%eax ;"
- "movl $19, %%ecx ;"
- "cmovnz %%rcx, %%rax ;"
- "addq %%rax, (%0) ;"
- "adcq $0, 8(%0) ;"
- "adcq $0, 16(%0) ;"
- "adcq $0, 24(%0) ;"
- "btrq $63, 24(%0) ;"
+ "movl $19, %k5 ;"
+ "movl $38, %k4 ;"
+
+ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
+ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
+
+ /* Add either 19 or 38 to c */
+ "addq %4, %0 ;"
+ "adcq $0, %1 ;"
+ "adcq $0, %2 ;"
+ "adcq $0, %3 ;"
+
+ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+ "movl $0, %k4 ;"
+ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
+ "btrq $63, %3 ;" /* Clear bit 255 */
+
+ /* Subtract 19 if necessary */
+ "subq %4, %0 ;"
+ "sbbq $0, %1 ;"
+ "sbbq $0, %2 ;"
+ "sbbq $0, %3 ;"
+
+ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "=r"(tmp0), "=r"(tmp1)
:
- : "r"(c)
- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx");
+ : "memory", "cc");
}
static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)