From jussi.kivilinna at iki.fi  Fri Oct  8 18:50:44 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Fri,  8 Oct 2021 19:50:44 +0300
Subject: [PATCH] cipher/sha256: fix 'accessing 32 bytes in a region of size 4'
 warnings
Message-ID: <20211008165044.2148150-1-jussi.kivilinna@iki.fi>

* cipher/sha256.c (SHA256_CONTEXT): Replace h0-h7 with h[8].
(do_sha256_transform_amd64_ssse3, do_sha256_transform_amd64_avx)
(do_sha256_transform_amd64_avx2, do_sha256_transform_intel_shaext)
(do_sha256_transform_armv8_ce, do_sha256_transform_ppc8)
(do_sha256_transform_ppc9, do_sha256_transform_s390x)
(do_sha256_final_s390x, sha256_init, sha224_init)
(do_transform_generic, sha256_final): Convert use of h0-h7 to h[0]-h[7].
--

GCC-11 gives following warning on sha256.c:
 warning: 'func' accessing 32 bytes in a region of size 4

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/sha256.c | 92 ++++++++++++++++++++++++-------------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/cipher/sha256.c b/cipher/sha256.c
index 7b2c78f7..74f7fdf0 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -119,9 +119,9 @@
 
 typedef struct {
   gcry_md_block_ctx_t bctx;
-  u32  h0,h1,h2,h3,h4,h5,h6,h7;
+  u32  h[8];
 #ifdef USE_S390X_CRYPTO
-  u32  final_len_msb, final_len_lsb; /* needs to be right after h7. */
+  u32  final_len_msb, final_len_lsb; /* needs to be right after h[7]. */
   int  use_s390x_crypto;
 #endif
 } SHA256_CONTEXT;
@@ -153,7 +153,7 @@ do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data,
                                 size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
+  return _gcry_sha256_transform_amd64_ssse3 (data, hd->h, nblks)
          + ASM_EXTRA_STACK;
 }
 #endif
@@ -168,7 +168,7 @@ do_sha256_transform_amd64_avx(void *ctx, const unsigned char *data,
                               size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
+  return _gcry_sha256_transform_amd64_avx (data, hd->h, nblks)
          + ASM_EXTRA_STACK;
 }
 #endif
@@ -183,7 +183,7 @@ do_sha256_transform_amd64_avx2(void *ctx, const unsigned char *data,
                                size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
+  return _gcry_sha256_transform_amd64_avx2 (data, hd->h, nblks)
          + ASM_EXTRA_STACK;
 }
 #endif
@@ -200,7 +200,7 @@ do_sha256_transform_intel_shaext(void *ctx, const unsigned char *data,
                                  size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
+  return _gcry_sha256_transform_intel_shaext (hd->h, data, nblks);
 }
 #endif
 
@@ -214,7 +214,7 @@ do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data,
                              size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
+  return _gcry_sha256_transform_armv8_ce (hd->h, data, nblks);
 }
 #endif
 
@@ -231,14 +231,14 @@ static unsigned int
 do_sha256_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_ppc8 (&hd->h0, data, nblks);
+  return _gcry_sha256_transform_ppc8 (hd->h, data, nblks);
 }
 
 static unsigned int
 do_sha256_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
-  return _gcry_sha256_transform_ppc9 (&hd->h0, data, nblks);
+  return _gcry_sha256_transform_ppc9 (hd->h, data, nblks);
 }
 #endif
 
@@ -250,7 +250,7 @@ do_sha256_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA256_CONTEXT *hd = ctx;
 
-  kimd_execute (KMID_FUNCTION_SHA256, &hd->h0, data, nblks * 64);
+  kimd_execute (KMID_FUNCTION_SHA256, hd->h, data, nblks * 64);
   return 0;
 }
 
@@ -261,18 +261,18 @@ do_sha256_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
   SHA256_CONTEXT *hd = ctx;
 
   /* Make sure that 'final_len' is positioned at correct offset relative
-   * to 'h0'. This is because we are passing 'h0' pointer as start of
+   * to 'h[0]'. This is because we are passing 'h[0]' pointer as start of
    * parameter block to 'klmd' instruction. */
 
   gcry_assert (offsetof (SHA256_CONTEXT, final_len_msb)
-	       - offsetof (SHA256_CONTEXT, h0) == 8 * sizeof(u32));
+	       - offsetof (SHA256_CONTEXT, h[0]) == 8 * sizeof(u32));
   gcry_assert (offsetof (SHA256_CONTEXT, final_len_lsb)
 	       - offsetof (SHA256_CONTEXT, final_len_msb) == 1 * sizeof(u32));
 
   hd->final_len_msb = len_msb;
   hd->final_len_lsb = len_lsb;
 
-  klmd_execute (KMID_FUNCTION_SHA256, &hd->h0, data, datalen);
+  klmd_execute (KMID_FUNCTION_SHA256, hd->h, data, datalen);
   return 0;
 }
 #endif
@@ -347,14 +347,14 @@ sha256_init (void *context, unsigned int flags)
 
   (void)flags;
 
-  hd->h0 = 0x6a09e667;
-  hd->h1 = 0xbb67ae85;
-  hd->h2 = 0x3c6ef372;
-  hd->h3 = 0xa54ff53a;
-  hd->h4 = 0x510e527f;
-  hd->h5 = 0x9b05688c;
-  hd->h6 = 0x1f83d9ab;
-  hd->h7 = 0x5be0cd19;
+  hd->h[0] = 0x6a09e667;
+  hd->h[1] = 0xbb67ae85;
+  hd->h[2] = 0x3c6ef372;
+  hd->h[3] = 0xa54ff53a;
+  hd->h[4] = 0x510e527f;
+  hd->h[5] = 0x9b05688c;
+  hd->h[6] = 0x1f83d9ab;
+  hd->h[7] = 0x5be0cd19;
 
   sha256_common_init (hd);
 }
@@ -367,14 +367,14 @@ sha224_init (void *context, unsigned int flags)
 
   (void)flags;
 
-  hd->h0 = 0xc1059ed8;
-  hd->h1 = 0x367cd507;
-  hd->h2 = 0x3070dd17;
-  hd->h3 = 0xf70e5939;
-  hd->h4 = 0xffc00b31;
-  hd->h5 = 0x68581511;
-  hd->h6 = 0x64f98fa7;
-  hd->h7 = 0xbefa4fa4;
+  hd->h[0] = 0xc1059ed8;
+  hd->h[1] = 0x367cd507;
+  hd->h[2] = 0x3070dd17;
+  hd->h[3] = 0xf70e5939;
+  hd->h[4] = 0xffc00b31;
+  hd->h[5] = 0x68581511;
+  hd->h[6] = 0x64f98fa7;
+  hd->h[7] = 0xbefa4fa4;
 
   sha256_common_init (hd);
 }
@@ -441,14 +441,14 @@ do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
       u32 a,b,c,d,e,f,g,h,t1,t2;
       u32 w[16];
 
-      a = hd->h0;
-      b = hd->h1;
-      c = hd->h2;
-      d = hd->h3;
-      e = hd->h4;
-      f = hd->h5;
-      g = hd->h6;
-      h = hd->h7;
+      a = hd->h[0];
+      b = hd->h[1];
+      c = hd->h[2];
+      d = hd->h[3];
+      e = hd->h[4];
+      f = hd->h[5];
+      g = hd->h[6];
+      h = hd->h[7];
 
       R(a, b, c, d, e, f, g, h, K[0], I(0));
       R(h, a, b, c, d, e, f, g, K[1], I(1));
@@ -518,14 +518,14 @@ do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
       R(c, d, e, f, g, h, a, b, K[62], W(62));
       R(b, c, d, e, f, g, h, a, K[63], W(63));
 
-      hd->h0 += a;
-      hd->h1 += b;
-      hd->h2 += c;
-      hd->h3 += d;
-      hd->h4 += e;
-      hd->h5 += f;
-      hd->h6 += g;
-      hd->h7 += h;
+      hd->h[0] += a;
+      hd->h[1] += b;
+      hd->h[2] += c;
+      hd->h[3] += d;
+      hd->h[4] += e;
+      hd->h[5] += f;
+      hd->h[6] += g;
+      hd->h[7] += h;
 
       data += 64;
     }
@@ -603,7 +603,7 @@ sha256_final(void *context)
     }
 
   p = hd->bctx.buf;
-#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
+#define X(a) do { buf_put_be32(p, hd->h[a]); p += 4; } while(0)
   X(0);
   X(1);
   X(2);
-- 
2.30.2


From gniibe at fsij.org  Fri Oct 15 10:09:54 2021
From: gniibe at fsij.org (NIIBE Yutaka)
Date: Fri, 15 Oct 2021 17:09:54 +0900
Subject: Argon2
Message-ID: <87v91yiu99.fsf@akagi.fsij.org>

Hello,

Jussi, if you have any idea about implementing Argon2 in libgcrypt,
please let us know.

Let me explain the background of mine.

Use of Argon2 in OpenPGP message were discussed sometimes.

   - 2015-10-18 about Argon2i:
   https://mailarchive.ietf.org/arch/browse/openpgp/?gbt=1&index=IORjkQR17EURj9HQaKCqoQ2TKkI

   - These days: Proposed change including S2K with Argon2id:
   https://gitlab.com/openpgp-wg/rfc4880bis/-/merge_requests/72/diffs

For GnuPG, it's not that important, I suppose.  Probably, we won't use.
Perhaps, at maximum, it will be (only) for interoperability to other
OpenPGP implementation exchanging secret keys.

For libgcrypt, on the other hand, I think that it's good if we support
Argon2 as crypto library.  When KDF supports Argon2, it's good.


So, I wonder about how we add Argon2 into libgcrypt.

Now, I looked at this code:

   https://github.com/P-H-C/phc-winner-argon2
   CC0 or Apache Public License 2.0

Any ideas?
-- 


From jussi.kivilinna at iki.fi  Wed Oct 20 18:49:35 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Wed, 20 Oct 2021 19:49:35 +0300
Subject: Argon2
In-Reply-To: <87v91yiu99.fsf@akagi.fsij.org>
References: <87v91yiu99.fsf@akagi.fsij.org>
Message-ID: <584e3784-609d-b56a-1a00-ed8f244e0b42@iki.fi>

Hello,

On 15.10.2021 11.09, NIIBE Yutaka wrote:
> Hello,
> 
> Jussi, if you have any idea about implementing Argon2 in libgcrypt,
> please let us know.
> 
> Let me explain the background of mine.
> 
> Use of Argon2 in OpenPGP message were discussed sometimes.
> 
>     - 2015-10-18 about Argon2i:
>     https://mailarchive.ietf.org/arch/browse/openpgp/?gbt=1&index=IORjkQR17EURj9HQaKCqoQ2TKkI
> 
>     - These days: Proposed change including S2K with Argon2id:
>     https://gitlab.com/openpgp-wg/rfc4880bis/-/merge_requests/72/diffs
> 
> For GnuPG, it's not that important, I suppose.  Probably, we won't use.
> Perhaps, at maximum, it will be (only) for interoperability to other
> OpenPGP implementation exchanging secret keys.
> 
> For libgcrypt, on the other hand, I think that it's good if we support
> Argon2 as crypto library.  When KDF supports Argon2, it's good.
> 

Yes, it would be nice if libgcrypt had Argon2 support.

> 
> So, I wonder about how we add Argon2 into libgcrypt.
> 
> Now, I looked at this code:
> 
>     https://github.com/P-H-C/phc-winner-argon2
>     CC0 or Apache Public License 2.0
> 
> Any ideas?
> 

We could use that repo and RFC9106 as starting point and select single-thread
code-path (core.c, ARGON2_NO_THREADS) and reference fill_blocks (src/ref.c) for
initial implementation. Vector instruction set optimization and multi-threading
could be added later (latter would need threading support in gpgrt, I think).

One thing I'm not sure is about is how to introduce Argon2 to KDF interface.
Argon2 has more parameters (degree of parallelism p, memory size m, number of
passes t) than what libgcrypt takes with `gcry_kdf_derive()`, which only takes
`iterations`. So we'd need new interface for Argon2.

-Jussi


From wk at gnupg.org  Fri Oct 22 18:49:56 2021
From: wk at gnupg.org (Werner Koch)
Date: Fri, 22 Oct 2021 18:49:56 +0200
Subject: Argon2
In-Reply-To: <584e3784-609d-b56a-1a00-ed8f244e0b42@iki.fi> (Jussi Kivilinna's
 message of "Wed, 20 Oct 2021 19:49:35 +0300")
References: <87v91yiu99.fsf@akagi.fsij.org>
 <584e3784-609d-b56a-1a00-ed8f244e0b42@iki.fi>
Message-ID: <87v91pngwb.fsf@wheatstone.g10code.de>

On Wed, 20 Oct 2021 19:49, Jussi Kivilinna said:

> One thing I'm not sure is about is how to introduce Argon2 to KDF interface.
> Argon2 has more parameters (degree of parallelism p, memory size m, number of
> passes t) than what libgcrypt takes with `gcry_kdf_derive()`, which only takes
> `iterations`. So we'd need new interface for Argon2.

Well, we may make use of the SUBALGO and put sveral parameters into
ITERATIONS.  This would allow us to add just a a couple of macros and
save the trouble of adding a new interface.


Shalom-Salam,

   Werner

-- 
Die Gedanken sind frei.  Ausnahmen regelt ein Bundesgesetz.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.gnupg.org/pipermail/gcrypt-devel/attachments/20211022/8411d2ba/attachment.sig>

From jussi.kivilinna at iki.fi  Mon Oct 25 18:51:13 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Mon, 25 Oct 2021 19:51:13 +0300
Subject: Argon2
In-Reply-To: <87v91pngwb.fsf@wheatstone.g10code.de>
References: <87v91yiu99.fsf@akagi.fsij.org>
 <584e3784-609d-b56a-1a00-ed8f244e0b42@iki.fi>
 <87v91pngwb.fsf@wheatstone.g10code.de>
Message-ID: <e251666b-a83f-9562-7f60-d57ae737a53d@iki.fi>

On 22.10.2021 19.49, Werner Koch wrote:
> On Wed, 20 Oct 2021 19:49, Jussi Kivilinna said:
> 
>> One thing I'm not sure is about is how to introduce Argon2 to KDF interface.
>> Argon2 has more parameters (degree of parallelism p, memory size m, number of
>> passes t) than what libgcrypt takes with `gcry_kdf_derive()`, which only takes
>> `iterations`. So we'd need new interface for Argon2.
> 
> Well, we may make use of the SUBALGO and put sveral parameters into
> ITERATIONS.  This would allow us to add just a a couple of macros and
> save the trouble of adding a new interface.
> 

RFC defines two 32-bit parameters (m and t) and one 24-bit (p). We would
need to use reduced value ranges for these if passed through ITERATIONS.
Large values won't probably have practical use but it would be nice to
support full value ranges. Maybe KEYBUFFER could be used for passing
pointer to Argon2 parameter structure?

-Jussi

> 
> Shalom-Salam,
> 
>     Werner
> 


From jussi.kivilinna at iki.fi  Mon Oct 25 19:55:20 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Mon, 25 Oct 2021 20:55:20 +0300
Subject: [PATCH] poly1305: fix building with 'arm-linux-gnueabihf-gcc-11 -O3'
Message-ID: <20211025175520.1968630-1-jussi.kivilinna@iki.fi>

* cipher/poly1305.c [HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS]
(ADD_1305_32): Reduce number of register operands.
--

Ubuntu 21.10 arm-linux-gnueabihf-gcc gave following error with -O3:

poly1305.c: In function '_gcry_poly1305_update_burn':
cipher/poly1305.c:425:7: error: 'asm' operand has impossible constraints
  425 |       ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0);
      |       ^

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/poly1305.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 41e55e8d..e57e64f3 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -298,15 +298,37 @@ static unsigned int poly1305_final (poly1305_context_t *ctx,
 	       : "0" (HI), "1" (LO), "r" (A), "r" (B) )
 
 /* A += B (arm) */
-#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+#  define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+      u32 __carry; \
       __asm__ ("adds %0, %0, %5\n" \
 	       "adcs %1, %1, %6\n" \
 	       "adcs %2, %2, %7\n" \
 	       "adcs %3, %3, %8\n" \
-	       "adc %4, %4, %9\n" \
-	       : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
-	       : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \
-	       : "cc" )
+	       : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), \
+	         "=@cccs" (__carry) \
+	       : "r" (B0), "r" (B1), "r" (B2), "r" (B3) \
+	       : ); \
+      (A4) += (B4) + __carry; \
+    } while (0)
+#else
+#  define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+      u32 __carry = (B0); \
+      __asm__ ("adds %0, %0, %2\n" \
+	       "adcs %1, %1, %3\n" \
+	       "rrx %2, %2\n" /* carry to 31th bit */ \
+	       : "+r" (A0), "+r" (A1), "+r" (__carry) \
+	       : "r" (B1), "r" (0) \
+	       : "cc" ); \
+      __asm__ ("lsls %0, %0, #1\n" /* carry from 31th bit */ \
+	       "adcs %1, %1, %4\n" \
+	       "adcs %2, %2, %5\n" \
+	       "adc  %3, %3, %6\n" \
+	       : "+r" (__carry), "+r" (A2), "+r" (A3), "+r" (A4) \
+	       : "r" (B2), "r" (B3), "r" (B4) \
+	       : "cc" ); \
+    } while (0)
+#endif
 
 #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */
 
-- 
2.32.0


From jussi.kivilinna at iki.fi  Mon Oct 25 20:02:12 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Mon, 25 Oct 2021 21:02:12 +0300
Subject: [PATCH 1/3] cipher/sha512: fix 'accessing 64 bytes in a region of
 size 8' warnings
Message-ID: <20211025180214.1970358-1-jussi.kivilinna@iki.fi>

* cipher/sha512.c (SHA512_STATESHA256_CONTEXT): Replace h0-h7 with h[8].
(do_sha512_transform_i386_ssse3, do_sha512_transform_ppc8)
(do_sha512_transform_ppc9, do_sha512_transform_s390x)
(do_sha512_final_s390x, sha512_init, sha384_init, sha512_256_init)
(sha512_224_init, do_transform_generic, sha512_final): Convert use
of h0-h7 to h[0]-h[7].
--

GCC-11 gives following warning on sha512.c:
 warning: 'func' accessing 64 bytes in a region of size 8

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/sha512.c | 118 ++++++++++++++++++++++++------------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/cipher/sha512.c b/cipher/sha512.c
index 45291567..2d4f58e0 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -134,7 +134,7 @@
 
 typedef struct
 {
-  u64 h0, h1, h2, h3, h4, h5, h6, h7;
+  u64 h[8];
 } SHA512_STATE;
 
 typedef struct
@@ -142,7 +142,7 @@ typedef struct
   gcry_md_block_ctx_t bctx;
   SHA512_STATE state;
 #ifdef USE_S390X_CRYPTO
-  u64 final_len_msb, final_len_lsb; /* needs to be right after state.h7. */
+  u64 final_len_msb, final_len_lsb; /* needs to be right after state.h[7]. */
   int use_s390x_crypto;
 #endif
 } SHA512_CONTEXT;
@@ -277,7 +277,7 @@ do_sha512_transform_i386_ssse3(void *ctx, const unsigned char *data,
 			       size_t nblks)
 {
   SHA512_CONTEXT *hd = ctx;
-  return _gcry_sha512_transform_i386_ssse3 (&hd->state.h0, data, nblks);
+  return _gcry_sha512_transform_i386_ssse3 (hd->state.h, data, nblks);
 }
 #endif
 
@@ -312,14 +312,14 @@ static unsigned int
 do_sha512_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA512_CONTEXT *hd = ctx;
-  return _gcry_sha512_transform_ppc8 (&hd->state.h0, data, nblks);
+  return _gcry_sha512_transform_ppc8 (hd->state.h, data, nblks);
 }
 
 static unsigned int
 do_sha512_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA512_CONTEXT *hd = ctx;
-  return _gcry_sha512_transform_ppc9 (&hd->state.h0, data, nblks);
+  return _gcry_sha512_transform_ppc9 (hd->state.h, data, nblks);
 }
 #endif
 
@@ -332,7 +332,7 @@ do_sha512_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
 {
   SHA512_CONTEXT *hd = ctx;
 
-  kimd_execute (KMID_FUNCTION_SHA512, &hd->state.h0, data, nblks * 128);
+  kimd_execute (KMID_FUNCTION_SHA512, hd->state.h, data, nblks * 128);
   return 0;
 }
 
@@ -343,18 +343,18 @@ do_sha512_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
   SHA512_CONTEXT *hd = ctx;
 
   /* Make sure that 'final_len' is positioned at correct offset relative
-   * to 'state.h0'. This is because we are passing 'state.h0' pointer as start of
-   * parameter block to 'klmd' instruction. */
+   * to 'state.h[0]'. This is because we are passing 'state.h[0]' pointer as
+   * start of parameter block to 'klmd' instruction. */
 
   gcry_assert (offsetof (SHA512_CONTEXT, final_len_msb)
-	       - offsetof (SHA512_CONTEXT, state.h0) == 8 * sizeof(u64));
+	       - offsetof (SHA512_CONTEXT, state.h[0]) == 8 * sizeof(u64));
   gcry_assert (offsetof (SHA512_CONTEXT, final_len_lsb)
 	       - offsetof (SHA512_CONTEXT, final_len_msb) == 1 * sizeof(u64));
 
   hd->final_len_msb = len_msb;
   hd->final_len_lsb = len_lsb;
 
-  klmd_execute (KMID_FUNCTION_SHA512, &hd->state.h0, data, datalen);
+  klmd_execute (KMID_FUNCTION_SHA512, hd->state.h, data, datalen);
   return 0;
 }
 #endif
@@ -425,14 +425,14 @@ sha512_init (void *context, unsigned int flags)
   SHA512_CONTEXT *ctx = context;
   SHA512_STATE *hd = &ctx->state;
 
-  hd->h0 = U64_C(0x6a09e667f3bcc908);
-  hd->h1 = U64_C(0xbb67ae8584caa73b);
-  hd->h2 = U64_C(0x3c6ef372fe94f82b);
-  hd->h3 = U64_C(0xa54ff53a5f1d36f1);
-  hd->h4 = U64_C(0x510e527fade682d1);
-  hd->h5 = U64_C(0x9b05688c2b3e6c1f);
-  hd->h6 = U64_C(0x1f83d9abfb41bd6b);
-  hd->h7 = U64_C(0x5be0cd19137e2179);
+  hd->h[0] = U64_C(0x6a09e667f3bcc908);
+  hd->h[1] = U64_C(0xbb67ae8584caa73b);
+  hd->h[2] = U64_C(0x3c6ef372fe94f82b);
+  hd->h[3] = U64_C(0xa54ff53a5f1d36f1);
+  hd->h[4] = U64_C(0x510e527fade682d1);
+  hd->h[5] = U64_C(0x9b05688c2b3e6c1f);
+  hd->h[6] = U64_C(0x1f83d9abfb41bd6b);
+  hd->h[7] = U64_C(0x5be0cd19137e2179);
 
   sha512_init_common (ctx, flags);
 }
@@ -443,14 +443,14 @@ sha384_init (void *context, unsigned int flags)
   SHA512_CONTEXT *ctx = context;
   SHA512_STATE *hd = &ctx->state;
 
-  hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
-  hd->h1 = U64_C(0x629a292a367cd507);
-  hd->h2 = U64_C(0x9159015a3070dd17);
-  hd->h3 = U64_C(0x152fecd8f70e5939);
-  hd->h4 = U64_C(0x67332667ffc00b31);
-  hd->h5 = U64_C(0x8eb44a8768581511);
-  hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
-  hd->h7 = U64_C(0x47b5481dbefa4fa4);
+  hd->h[0] = U64_C(0xcbbb9d5dc1059ed8);
+  hd->h[1] = U64_C(0x629a292a367cd507);
+  hd->h[2] = U64_C(0x9159015a3070dd17);
+  hd->h[3] = U64_C(0x152fecd8f70e5939);
+  hd->h[4] = U64_C(0x67332667ffc00b31);
+  hd->h[5] = U64_C(0x8eb44a8768581511);
+  hd->h[6] = U64_C(0xdb0c2e0d64f98fa7);
+  hd->h[7] = U64_C(0x47b5481dbefa4fa4);
 
   sha512_init_common (ctx, flags);
 }
@@ -462,14 +462,14 @@ sha512_256_init (void *context, unsigned int flags)
   SHA512_CONTEXT *ctx = context;
   SHA512_STATE *hd = &ctx->state;
 
-  hd->h0 = U64_C(0x22312194fc2bf72c);
-  hd->h1 = U64_C(0x9f555fa3c84c64c2);
-  hd->h2 = U64_C(0x2393b86b6f53b151);
-  hd->h3 = U64_C(0x963877195940eabd);
-  hd->h4 = U64_C(0x96283ee2a88effe3);
-  hd->h5 = U64_C(0xbe5e1e2553863992);
-  hd->h6 = U64_C(0x2b0199fc2c85b8aa);
-  hd->h7 = U64_C(0x0eb72ddc81c52ca2);
+  hd->h[0] = U64_C(0x22312194fc2bf72c);
+  hd->h[1] = U64_C(0x9f555fa3c84c64c2);
+  hd->h[2] = U64_C(0x2393b86b6f53b151);
+  hd->h[3] = U64_C(0x963877195940eabd);
+  hd->h[4] = U64_C(0x96283ee2a88effe3);
+  hd->h[5] = U64_C(0xbe5e1e2553863992);
+  hd->h[6] = U64_C(0x2b0199fc2c85b8aa);
+  hd->h[7] = U64_C(0x0eb72ddc81c52ca2);
 
   sha512_init_common (ctx, flags);
 }
@@ -481,14 +481,14 @@ sha512_224_init (void *context, unsigned int flags)
   SHA512_CONTEXT *ctx = context;
   SHA512_STATE *hd = &ctx->state;
 
-  hd->h0 = U64_C(0x8c3d37c819544da2);
-  hd->h1 = U64_C(0x73e1996689dcd4d6);
-  hd->h2 = U64_C(0x1dfab7ae32ff9c82);
-  hd->h3 = U64_C(0x679dd514582f9fcf);
-  hd->h4 = U64_C(0x0f6d2b697bd44da8);
-  hd->h5 = U64_C(0x77e36f7304c48942);
-  hd->h6 = U64_C(0x3f9d85a86a1d36c8);
-  hd->h7 = U64_C(0x1112e6ad91d692a1);
+  hd->h[0] = U64_C(0x8c3d37c819544da2);
+  hd->h[1] = U64_C(0x73e1996689dcd4d6);
+  hd->h[2] = U64_C(0x1dfab7ae32ff9c82);
+  hd->h[3] = U64_C(0x679dd514582f9fcf);
+  hd->h[4] = U64_C(0x0f6d2b697bd44da8);
+  hd->h[5] = U64_C(0x77e36f7304c48942);
+  hd->h[6] = U64_C(0x3f9d85a86a1d36c8);
+  hd->h[7] = U64_C(0x1112e6ad91d692a1);
 
   sha512_init_common (ctx, flags);
 }
@@ -543,14 +543,14 @@ do_transform_generic (void *context, const unsigned char *data, size_t nblks)
       int t;
 
       /* get values from the chaining vars */
-      a = hd->h0;
-      b = hd->h1;
-      c = hd->h2;
-      d = hd->h3;
-      e = hd->h4;
-      f = hd->h5;
-      g = hd->h6;
-      h = hd->h7;
+      a = hd->h[0];
+      b = hd->h[1];
+      c = hd->h[2];
+      d = hd->h[3];
+      e = hd->h[4];
+      f = hd->h[5];
+      g = hd->h[6];
+      h = hd->h[7];
 
       for ( t = 0; t < 16; t++ )
         w[t] = buf_get_be64(data + t * 8);
@@ -749,14 +749,14 @@ do_transform_generic (void *context, const unsigned char *data, size_t nblks)
         }
 
       /* Update chaining vars.  */
-      hd->h0 += a;
-      hd->h1 += b;
-      hd->h2 += c;
-      hd->h3 += d;
-      hd->h4 += e;
-      hd->h5 += f;
-      hd->h6 += g;
-      hd->h7 += h;
+      hd->h[0] += a;
+      hd->h[1] += b;
+      hd->h[2] += c;
+      hd->h[3] += d;
+      hd->h[4] += e;
+      hd->h[5] += f;
+      hd->h[6] += g;
+      hd->h[7] += h;
 
       data += 128;
     }
@@ -836,7 +836,7 @@ sha512_final (void *context)
     }
 
   p = hd->bctx.buf;
-#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0)
+#define X(a) do { buf_put_be64(p, hd->state.h[a]); p += 8; } while (0)
   X (0);
   X (1);
   X (2);
-- 
2.32.0


From jussi.kivilinna at iki.fi  Mon Oct 25 20:02:14 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Mon, 25 Oct 2021 21:02:14 +0300
Subject: [PATCH 3/3] tests/bench-slope: avoid divide by zero
In-Reply-To: <20211025180214.1970358-1-jussi.kivilinna@iki.fi>
References: <20211025180214.1970358-1-jussi.kivilinna@iki.fi>
Message-ID: <20211025180214.1970358-3-jussi.kivilinna@iki.fi>

* tests/bench-slope.c (safe_div): New.
(get_slope): Make static; Skip if number of points is too small; Use
safe_div.
(do_slope_benchmark): Retry benchmark if result does not make sense;
Limit retries to 4 for non-auto-ghz and 1000 for auto-ghz.
(get_auto_ghz, do_slope_benchmark, bench_print_result_csv)
(bench_print_result_std): Use safe_div.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 tests/bench-slope.c | 64 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index 1723899c..7a17cf10 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -23,7 +23,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <string.h>
 #include <assert.h>
+#include <float.h>
 #include <time.h>
 #ifdef _WIN32
 #include <windows.h>
@@ -253,7 +255,28 @@ struct bench_ops
 };
 
 
-double
+static double
+safe_div (double x, double y)
+{
+  union
+  {
+    double d;
+    char buf[sizeof(double)];
+  } u_neg_zero, u_y;
+
+  if (y != 0)
+    return x / y;
+
+  u_neg_zero.d = -0.0;
+  u_y.d = y;
+  if (memcmp(u_neg_zero.buf, u_y.buf, sizeof(double)) == 0)
+    return -DBL_MAX;
+
+  return DBL_MAX;
+}
+
+
+static double
 get_slope (double (*const get_x) (unsigned int idx, void *priv),
 	   void *get_x_priv, double y_points[], unsigned int npoints,
 	   double *overhead)
@@ -264,12 +287,18 @@ get_slope (double (*const get_x) (unsigned int idx, void *priv),
 
   sumx = sumy = sumx2 = sumy2 = sumxy = 0;
 
+  if (npoints <= 1)
+    {
+      /* No slope with zero or one point. */
+      return 0;
+    }
+
   for (i = 0; i < npoints; i++)
     {
       double x, y;
 
       x = get_x (i, get_x_priv);	/* bytes */
-      y = y_points[i];		/* nsecs */
+      y = y_points[i];			/* nsecs */
 
       sumx += x;
       sumy += y;
@@ -278,11 +307,13 @@ get_slope (double (*const get_x) (unsigned int idx, void *priv),
       sumxy += x * y;
     }
 
-  b = (npoints * sumxy - sumx * sumy) / (npoints * sumx2 - sumx * sumx);
-  a = (sumy - b * sumx) / npoints;
+  b = safe_div(npoints * sumxy - sumx * sumy, npoints * sumx2 - sumx * sumx);
 
   if (overhead)
-    *overhead = a;		/* nsecs */
+    {
+      a = safe_div(sumy - b * sumx, npoints);
+      *overhead = a;		/* nsecs */
+    }
 
   return b;			/* nsecs per byte */
 }
@@ -590,20 +621,25 @@ get_auto_ghz (void)
 
   /* Adjust CPU Ghz so that cycles per iteration would give '1024.0'. */
 
-  return cpu_ghz * 1024 / cycles_per_iteration;
+  return safe_div(cpu_ghz * 1024, cycles_per_iteration);
 }
 
 
 double
 do_slope_benchmark (struct bench_obj *obj)
 {
+  unsigned int try_count = 0;
   double ret;
 
   if (!auto_ghz)
     {
       /* Perform measurement without autodetection of CPU frequency. */
 
-      ret = slope_benchmark (obj);
+      do
+        {
+	  ret = slope_benchmark (obj);
+        }
+      while (ret <= 0 && try_count++ <= 4);
 
       bench_ghz = cpu_ghz;
       bench_ghz_diff = 0;
@@ -615,7 +651,6 @@ do_slope_benchmark (struct bench_obj *obj)
       double cpu_auto_ghz_after;
       double nsecs_per_iteration;
       double diff;
-      unsigned int try_count = 0;
 
       /* Perform measurement with CPU frequency autodetection. */
 
@@ -623,12 +658,10 @@ do_slope_benchmark (struct bench_obj *obj)
         {
           /* Repeat measurement until CPU turbo frequency has stabilized. */
 
-	  if (try_count++ > 4)
+	  if ((++try_count % 4) == 0)
 	    {
 	      /* Too much frequency instability on the system, relax target
 	       * accuracy. */
-
-	      try_count = 0;
 	      target_diff *= 2;
 	    }
 
@@ -638,10 +671,11 @@ do_slope_benchmark (struct bench_obj *obj)
 
           cpu_auto_ghz_after = get_auto_ghz ();
 
-          diff = 1.0 - (cpu_auto_ghz_before / cpu_auto_ghz_after);
+          diff = 1.0 - safe_div(cpu_auto_ghz_before, cpu_auto_ghz_after);
           diff = diff < 0 ? -diff : diff;
         }
-      while (diff > target_diff);
+      while ((nsecs_per_iteration <= 0 || diff > target_diff)
+	     && try_count < 1000);
 
       ret = nsecs_per_iteration;
 
@@ -702,7 +736,7 @@ bench_print_result_csv (double nsecs_per_byte)
     }
 
   mbytes_per_sec =
-    (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024);
+      safe_div(1000.0 * 1000.0 * 1000.0, nsecs_per_byte * 1024 * 1024);
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
   /* We print two empty fields to allow for future enhancements.  */
@@ -763,7 +797,7 @@ bench_print_result_std (double nsecs_per_byte)
     }
 
   mbytes_per_sec =
-    (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024);
+      safe_div(1000.0 * 1000.0 * 1000.0, nsecs_per_byte * 1024 * 1024);
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
   if (auto_ghz)
-- 
2.32.0


From jussi.kivilinna at iki.fi  Mon Oct 25 20:02:13 2021
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Mon, 25 Oct 2021 21:02:13 +0300
Subject: [PATCH 2/3] md: clear context with wipememory to avoid false warning
In-Reply-To: <20211025180214.1970358-1-jussi.kivilinna@iki.fi>
References: <20211025180214.1970358-1-jussi.kivilinna@iki.fi>
Message-ID: <20211025180214.1970358-2-jussi.kivilinna@iki.fi>

* cipher/md.c (md_open): Initialize ctx with wipememory2 instead
of memset.
--

GCC 11 gives warning on use of memset:
 warning: '__builtin_memset' offset [0, 19] is out of the bounds [0, 0] [-Warray-bounds]

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cipher/md.c b/cipher/md.c
index 428959b2..64dc235a 100644
--- a/cipher/md.c
+++ b/cipher/md.c
@@ -493,7 +493,7 @@ md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
       hd->bufpos = 0;
 
       /* Initialize the private data. */
-      memset (ctx, 0, sizeof *ctx);
+      wipememory2 (ctx, 0, sizeof *ctx);
       ctx->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
       ctx->actual_handle_size = n + sizeof (struct gcry_md_context);
       ctx->flags.secure = secure;
-- 
2.32.0