diff options
author | Samuel Neves <sneves@dei.uc.pt> | 2018-07-28 07:23:07 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-07-28 14:51:19 +0200 |
commit | 981a0bd665f2eb5b0eb7625969ea588c3c67ed20 (patch) | |
tree | 42e64b94f2b733244a371d4de730766898abee55 /src/crypto/curve25519-x86_64.h | |
parent | 06bab82a51b769ca96297d09ed96afcbcfb36189 (diff) |
curve25519-x86_64: tighten reductions modulo 2^256-38
At this stage the value if C[4] is at most ((2^256-1) + 38*(2^256-1)) / 2^256 = 38,
so there is no need to use a wide multiplication.
Change inspired by Andy Polyakov's OpenSSL implementation.
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src/crypto/curve25519-x86_64.h')
-rw-r--r-- | src/crypto/curve25519-x86_64.h | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/src/crypto/curve25519-x86_64.h b/src/crypto/curve25519-x86_64.h index 29204de..b1c3766 100644 --- a/src/crypto/curve25519-x86_64.h +++ b/src/crypto/curve25519-x86_64.h @@ -909,10 +909,9 @@ static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) /***************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" - "clc ;" - "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */ - "adcx %%rax, %%r8 ;" - "adcx %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" "movq %%r9, 8(%0) ;" "adcx %%rbx, %%r10 ;" "movq %%r10, 16(%0) ;" @@ -938,10 +937,9 @@ static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) /****************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" - "clc ;" - "mulx %%rcx, %%rax, %%rcx ; " /* c*C[4] */ - "adcx %%rax, %%r8 ;" - "adcx %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" "movq %%r9, 40(%0) ;" "adcx %%rbx, %%r10 ;" "movq %%r10, 48(%0) ;" @@ -974,9 +972,9 @@ static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) "adcq 16(%1), %%r10 ;" "adcq 24(%1), %%r11 ;" "adcq $0, %%rcx ;" - "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ - "addq %%rax, %%r8 ;" - "adcq %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" "movq %%r9, 8(%0) ;" "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" @@ -1001,9 +999,9 @@ static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) "adcq 80(%1), %%r10 ;" "adcq 88(%1), %%r11 ;" "adcq $0, %%rcx ;" - "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ - "addq %%rax, %%r8 ;" - "adcq %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" "movq %%r9, 40(%0) ;" "adcq $0, %%r10 ;" "movq %%r10, 48(%0) ;" @@ -1333,10 +1331,9 @@ static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) /***************************************/ "adcx %%rbx, %%rcx ;" "adox %%rbx, %%rcx ;" - "clc ;" - "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ - "adcx %%rax, %%r8 ;" - "adcx %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ + "adcx %%rcx, %%r8 ;" + "adcx %%rbx, %%r9 ;" "movq %%r9, 8(%0) ;" "adcx %%rbx, %%r10 ;" "movq %%r10, 16(%0) ;" @@ -1369,9 +1366,9 @@ static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) "adcq 16(%1), %%r10 ;" "adcq 24(%1), %%r11 ;" "adcq $0, %%rcx ;" - "mulx %%rcx, %%rax, %%rcx ;" /* c*C[4] */ - "addq %%rax, %%r8 ;" - "adcq %%rcx, %%r9 ;" + "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ + "addq %%rcx, %%r8 ;" + "adcq $0, %%r9 ;" "movq %%r9, 8(%0) ;" "adcq $0, %%r10 ;" "movq %%r10, 16(%0) ;" |