[PATCH 5/8] mpi/ec: small optimization for ec_mulm_25519

Jussi Kivilinna jussi.kivilinna at iki.fi
Mon Apr 26 23:00:43 CEST 2021


* mpi/ec.c (ec_addm_25519): Remove one addition.
(ec_subm_25519): Change order of add_n and set_cond to remove
need to clear 'n'.
(ec_mulm_25519): Avoid extra memory copies; Use _gcry_mpih_addmul_1
for multiplying by 19 and adding; Remove one addition at end.
--

Benchmarks on AMD Ryzen 7 5800X:

Before:
 Ed25519        |  nanosecs/iter   cycles/iter  auto Mhz
         keygen |         304980       1478913      4849
           sign |         328657       1589657      4837
         verify |         625133       3032355      4851

After (~22% faster):
 Ed25519        |  nanosecs/iter   cycles/iter  auto Mhz
         keygen |         244288       1184862      4850
           sign |         267831       1298934      4850
         verify |         504745       2449106      4852

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 mpi/ec.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/mpi/ec.c b/mpi/ec.c
index 0b6ae9a9..e1d4b32c 100644
--- a/mpi/ec.c
+++ b/mpi/ec.c
@@ -369,15 +369,13 @@ ec_addm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
   if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
     log_bug ("addm_25519: different sizes\n");
 
-  memset (n, 0, sizeof n);
   up = u->d;
   vp = v->d;
   wp = w->d;
 
   _gcry_mpih_add_n (wp, up, vp, wsize);
-  borrow = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
-  mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL));
-  _gcry_mpih_add_n (wp, wp, n, wsize);
+  borrow = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, (borrow == 0UL));
   wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
 }
 
@@ -392,14 +390,13 @@ ec_subm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
   if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
     log_bug ("subm_25519: different sizes\n");
 
-  memset (n, 0, sizeof n);
   up = u->d;
   vp = v->d;
   wp = w->d;
 
   borrow = _gcry_mpih_sub_n (wp, up, vp, wsize);
-  mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL));
-  _gcry_mpih_add_n (wp, wp, n, wsize);
+  _gcry_mpih_add_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, (borrow != 0UL));
   wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
 }
 
@@ -409,7 +406,6 @@ ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
   mpi_ptr_t wp, up, vp;
   mpi_size_t wsize = LIMB_SIZE_25519;
   mpi_limb_t n[LIMB_SIZE_25519*2];
-  mpi_limb_t m[LIMB_SIZE_25519+1];
   mpi_limb_t cy;
   int msb;
 
@@ -425,32 +421,19 @@ ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
   memcpy (wp, n, wsize * BYTES_PER_MPI_LIMB);
   wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
 
-  memcpy (m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB);
-  _gcry_mpih_rshift (m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB));
-
-  memcpy (n, m, wsize * BYTES_PER_MPI_LIMB);
-  cy = _gcry_mpih_lshift (m, m, LIMB_SIZE_25519, 4);
-  m[LIMB_SIZE_25519] = cy;
-  cy = _gcry_mpih_add_n (m, m, n, wsize);
-  m[LIMB_SIZE_25519] += cy;
-  cy = _gcry_mpih_add_n (m, m, n, wsize);
-  m[LIMB_SIZE_25519] += cy;
-  cy = _gcry_mpih_add_n (m, m, n, wsize);
-  m[LIMB_SIZE_25519] += cy;
+  _gcry_mpih_rshift (n, n+LIMB_SIZE_25519-1, LIMB_SIZE_25519+1,
+		     (255 % BITS_PER_MPI_LIMB));
 
-  cy = _gcry_mpih_add_n (wp, wp, m, wsize);
-  m[LIMB_SIZE_25519] += cy;
+  cy = _gcry_mpih_addmul_1 (wp, n, wsize, 19);
 
-  memset (m, 0, wsize * BYTES_PER_MPI_LIMB);
+  memset (n, 0, wsize * BYTES_PER_MPI_LIMB);
   msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
-  m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19;
+  n[0] = (cy * 2 + msb) * 19;
   wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
-  _gcry_mpih_add_n (wp, wp, m, wsize);
+  _gcry_mpih_add_n (wp, wp, n, wsize);
 
-  m[0] = 0;
-  cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
-  mpih_set_cond (m, ctx->p->d, wsize, (cy != 0UL));
-  _gcry_mpih_add_n (wp, wp, m, wsize);
+  cy = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, (cy == 0UL));
 }
 
 static void
-- 
2.30.2




More information about the Gcrypt-devel mailing list