From jussi.kivilinna at iki.fi  Sun Mar 11 16:10:49 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Sun, 11 Mar 2018 17:10:49 +0200
Subject: [PATCH 1/2] Improve constant-time buffer compare
Message-ID: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain>

* cipher/bufhelp.h (buf_eq_const): Rewrite logic.
--

New implementation for constant-time buffer comparing that
avoids generating conditional code in comparison loop.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index b854bc016..83d3f53aa 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -290,13 +290,19 @@ buf_eq_const(const void *_a, const void *_b, size_t len)
 {
   const byte *a = _a;
   const byte *b = _b;
-  size_t diff, i;
+  int ab, ba;
+  size_t i;
 
   /* Constant-time compare. */
-  for (i = 0, diff = 0; i < len; i++)
-    diff -= !!(a[i] - b[i]);
+  for (i = 0, ab = 0, ba = 0; i < len; i++)
+    {
+      /* If a[i] != b[i], either ab or ba will be negative. */
+      ab |= a[i] - b[i];
+      ba |= b[i] - a[i];
+    }
 
-  return !diff;
+  /* 'ab | ba' is negative when buffers are not equal. */
+  return (ab | ba) >= 0;
 }
 
 
From jussi.kivilinna at iki.fi  Sun Mar 11 16:10:54 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Sun, 11 Mar 2018 17:10:54 +0200
Subject: [PATCH 2/2] _gcry_burn_stack: use memset for clearing memory
In-Reply-To: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain>
References: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain>
Message-ID: <152078105472.30895.16990019070381191769.stgit@localhost.localdomain>

* src/misc.c (__gcry_burn_stack) [HAVE_VLA]: Use 'memset' for clearing
stack.
--

Patch switches stacking burning to use faster memset instead of
wipememory. Memset is accessed through volatile function pointer,
so that compiler will not optimize away the call.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/src/misc.c b/src/misc.c
index 002a84f69..47d2dc712 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -501,11 +501,12 @@ void
 __gcry_burn_stack (unsigned int bytes)
 {
 #ifdef HAVE_VLA
+    static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset;
     /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */
     unsigned int buflen = ((!bytes + bytes) + 63) & ~63;
-    volatile char buf[buflen];
+    char buf[buflen];
 
-    wipememory (buf, sizeof buf);
+    memset_ptr (buf, 0, sizeof buf);
 #else
     volatile char buf[64];
 

From jussi.kivilinna at iki.fi  Sun Mar 11 16:11:12 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Sun, 11 Mar 2018 17:11:12 +0200
Subject: [PATCH] bench-slope: add CPU frequency auto-detection
Message-ID: <152078107280.31205.13929557969371133343.stgit@localhost.localdomain>

* tests/bench-slope.c (bench_obj): Add 'hd'.
(bench_encrypt_init, bench_encrypt_free, bench_encrypt_do_bench)
(bench_decrypt_do_bench, bench_xts_encrypt_init)
(bench_xts_encrypt_do_bench, bench_xts_decrypt_do_bench)
(bench_ccm_encrypt_init, bench_ccm_encrypt_do_bench)
(bench_ccm_decrypt_do_bench, bench_aead_encrypt_init)
(bench_aead_encrypt_do_bench, bench_aead_decrypt_do_bench)
(bench_hash_init, bench_hash_free, bench_hash_do_bench)
(bench_mac_init, bench_mac_free, bench_mac_do_bench): Use 'obj->hd'
for storing pointer to crypto context.
(auto_ghz): New.
(do_slope_benchmark): Rename to...
(slope_benchmark): ...this.
(auto_ghz_init, auto_ghz_free, auto_ghz_bench, auto_ghz_detect_ops)
(get_auto_ghz, do_slope_benchmark): New.
(double_to_str): Round number larger than 1000 to integer.
(bench_print_result_csv, bench_print_result_std)
(bench_print_result, bench_print_header, cipher_bench_one)
(hash_bench_one, mac_bench_one, kdf_bench_one, kdf_bench): Add
auto-detected frequency printing.
(print_help): Help for CPU speed auto-detection mode.
(main): Add parsing for "--cpu-mhz auto".
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index e34104f7b..5c64f229d 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -50,6 +50,9 @@ static int num_measurement_repetitions;
    results.  */
 static double cpu_ghz = -1;
 
+/* Attempt to autodetect CPU Ghz. */
+static int auto_ghz;
+
 /* Whether we are running as part of the regression test suite.  */
 static int in_regression_test;
 
@@ -220,6 +223,7 @@ struct bench_obj
   unsigned int step_size;
 
   void *priv;
+  void *hd;
 };
 
 typedef int (*const bench_initialize_t) (struct bench_obj * obj);
@@ -383,7 +387,7 @@ adjust_loop_iterations_to_timer_accuracy (struct bench_obj *obj, void *buffer,
 
 /* Benchmark and return linear regression slope in nanoseconds per byte.  */
 double
-do_slope_benchmark (struct bench_obj *obj)
+slope_benchmark (struct bench_obj *obj)
 {
   unsigned int num_measurements;
   double *measurements = NULL;
@@ -464,6 +468,122 @@ err_free:
   return -1;
 }
 
+/********************************************* CPU frequency auto-detection. */
+
+static int
+auto_ghz_init (struct bench_obj *obj)
+{
+  obj->min_bufsize = 16;
+  obj->max_bufsize = 64 + obj->min_bufsize;
+  obj->step_size = 8;
+  obj->num_measure_repetitions = 16;
+
+  return 0;
+}
+
+static void
+auto_ghz_free (struct bench_obj *obj)
+{
+  (void)obj;
+}
+
+static void
+auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen)
+{
+  (void)obj;
+  (void)buf;
+
+  buflen *= 1024;
+
+  /* Turbo frequency detection benchmark. Without CPU turbo-boost, this
+   * function will give cycles/iteration result 1024.0 on high-end CPUs.
+   * With turbo, result will be less and can be used detect turbo-clock. */
+
+  do
+    {
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+      /* Use memory barrier to prevent compiler from optimizing this loop
+       * away. */
+
+      asm volatile ("":::"memory");
+#else
+      /* TODO: Needs alternative way. */
+#endif
+    }
+  while (--buflen);
+}
+
+static struct bench_ops auto_ghz_detect_ops = {
+  &auto_ghz_init,
+  &auto_ghz_free,
+  &auto_ghz_bench
+};
+
+
+double
+get_auto_ghz (void)
+{
+  struct bench_obj obj = { 0 };
+  double nsecs_per_iteration;
+  double cycles_per_iteration;
+
+  obj.ops = &auto_ghz_detect_ops;
+
+  nsecs_per_iteration = slope_benchmark (&obj);
+
+  cycles_per_iteration = nsecs_per_iteration * cpu_ghz;
+
+  /* Adjust CPU Ghz so that cycles per iteration would give '1024.0'. */
+
+  return cpu_ghz * 1024 / cycles_per_iteration;
+}
+
+
+double
+do_slope_benchmark (struct bench_obj *obj, double *bench_ghz)
+{
+  double ret;
+
+  if (!auto_ghz)
+    {
+      /* Perform measurement without autodetection of CPU frequency. */
+
+      ret = slope_benchmark (obj);
+
+      *bench_ghz = cpu_ghz;
+    }
+  else
+    {
+      double cpu_auto_ghz_before;
+      double cpu_auto_ghz_after;
+      double nsecs_per_iteration;
+      double diff;
+
+      /* Perform measurement with CPU frequency autodetection. */
+
+      do
+        {
+          /* Repeat measurement until CPU turbo frequency has stabilized. */
+
+          cpu_auto_ghz_before = get_auto_ghz ();
+
+          nsecs_per_iteration = slope_benchmark (obj);
+
+          cpu_auto_ghz_after = get_auto_ghz ();
+
+          diff = 1.0 - (cpu_auto_ghz_before / cpu_auto_ghz_after);
+          diff = diff < 0 ? -diff : diff;
+        }
+      while (diff > 5e-5);
+
+      ret = nsecs_per_iteration;
+
+      *bench_ghz = cpu_auto_ghz_after;
+    }
+
+  return ret;
+}
+
 
 /********************************************************** Printing results. */
 
@@ -476,29 +596,34 @@ double_to_str (char *out, size_t outlen, double value)
     fmt = "%.3f";
   else if (value < 100.0)
     fmt = "%.2f";
-  else
+  else if (value < 1000.0)
     fmt = "%.1f";
+  else
+    fmt = "%.0f";
 
   snprintf (out, outlen, fmt, value);
 }
 
 static void
-bench_print_result_csv (double nsecs_per_byte)
+bench_print_result_csv (double nsecs_per_byte, double bench_ghz)
 {
   double cycles_per_byte, mbytes_per_sec;
   char nsecpbyte_buf[16];
   char mbpsec_buf[16];
   char cpbyte_buf[16];
+  char mhz_buf[16];
 
   *cpbyte_buf = 0;
+  *mhz_buf = 0;
 
   double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte);
 
   /* If user didn't provide CPU speed, we cannot show cycles/byte results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_byte = nsecs_per_byte * cpu_ghz;
+      cycles_per_byte = nsecs_per_byte * bench_ghz;
       double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
 
   mbytes_per_sec =
@@ -506,50 +631,76 @@ bench_print_result_csv (double nsecs_per_byte)
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
   /* We print two empty fields to allow for future enhancements.  */
-  printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n",
-          current_section_name,
-          current_algo_name? current_algo_name : "",
-          current_mode_name? current_mode_name : "",
-          nsecpbyte_buf,
-          mbpsec_buf,
-          cpbyte_buf);
-
+  if (auto_ghz)
+    {
+      printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B,%s,Mhz\n",
+              current_section_name,
+              current_algo_name? current_algo_name : "",
+              current_mode_name? current_mode_name : "",
+              nsecpbyte_buf,
+              mbpsec_buf,
+              cpbyte_buf,
+              mhz_buf);
+    }
+  else
+    {
+      printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n",
+              current_section_name,
+              current_algo_name? current_algo_name : "",
+              current_mode_name? current_mode_name : "",
+              nsecpbyte_buf,
+              mbpsec_buf,
+              cpbyte_buf);
+    }
 }
 
 static void
-bench_print_result_std (double nsecs_per_byte)
+bench_print_result_std (double nsecs_per_byte, double bench_ghz)
 {
   double cycles_per_byte, mbytes_per_sec;
   char nsecpbyte_buf[16];
   char mbpsec_buf[16];
   char cpbyte_buf[16];
+  char mhz_buf[16];
 
   double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte);
 
   /* If user didn't provide CPU speed, we cannot show cycles/byte results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_byte = nsecs_per_byte * cpu_ghz;
+      cycles_per_byte = nsecs_per_byte * bench_ghz;
       double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
   else
-    strcpy (cpbyte_buf, "-");
+    {
+      strcpy (cpbyte_buf, "-");
+      strcpy (mhz_buf, "-");
+    }
 
   mbytes_per_sec =
     (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024);
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
-  printf ("%9s ns/B %9s MiB/s %9s c/B\n",
-          nsecpbyte_buf, mbpsec_buf, cpbyte_buf);
+  if (auto_ghz)
+    {
+      printf ("%9s ns/B %9s MiB/s %9s c/B %9s\n",
+              nsecpbyte_buf, mbpsec_buf, cpbyte_buf, mhz_buf);
+    }
+  else
+    {
+      printf ("%9s ns/B %9s MiB/s %9s c/B\n",
+              nsecpbyte_buf, mbpsec_buf, cpbyte_buf);
+    }
 }
 
 static void
-bench_print_result (double nsecs_per_byte)
+bench_print_result (double nsecs_per_byte, double bench_ghz)
 {
   if (csv_mode)
-    bench_print_result_csv (nsecs_per_byte);
+    bench_print_result_csv (nsecs_per_byte, bench_ghz);
   else
-    bench_print_result_std (nsecs_per_byte);
+    bench_print_result_std (nsecs_per_byte, bench_ghz);
 }
 
 static void
@@ -578,8 +729,13 @@ bench_print_header (int algo_width, const char *algo_name)
         printf (" %-*s | ", -algo_width, algo_name);
       else
         printf (" %-*s | ", algo_width, algo_name);
-      printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec",
-              "cycles/byte");
+
+      if (auto_ghz)
+        printf ("%14s %15s %13s %9s\n", "nanosecs/byte", "mebibytes/sec",
+                "cycles/byte", "auto Mhz");
+      else
+        printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec",
+                "cycles/byte");
     }
 }
 
@@ -684,7 +840,7 @@ bench_encrypt_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -692,7 +848,7 @@ bench_encrypt_init (struct bench_obj *obj)
 static void
 bench_encrypt_free (struct bench_obj *obj)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
 
   gcry_cipher_close (hd);
 }
@@ -700,7 +856,7 @@ bench_encrypt_free (struct bench_obj *obj)
 static void
 bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
 
   err = gcry_cipher_encrypt (hd, buf, buflen, buf, buflen);
@@ -716,7 +872,7 @@ bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
 
   err = gcry_cipher_decrypt (hd, buf, buflen, buf, buflen);
@@ -790,7 +946,7 @@ bench_xts_encrypt_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -798,7 +954,7 @@ bench_xts_encrypt_init (struct bench_obj *obj)
 static void
 bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   unsigned int pos;
   static const char tweak[16] = { 0xff, 0xff, 0xfe, };
   size_t sectorlen = obj->step_size;
@@ -825,7 +981,7 @@ bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_xts_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   unsigned int pos;
   static const char tweak[16] = { 0xff, 0xff, 0xfe, };
   size_t sectorlen = obj->step_size;
@@ -865,7 +1021,7 @@ static struct bench_ops xts_decrypt_ops = {
 static void
 bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8];
   char nonce[11] = { 0x80, 0x01, };
@@ -909,7 +1065,7 @@ bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_ccm_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8] = { 0, };
   char nonce[11] = { 0x80, 0x01, };
@@ -956,7 +1112,7 @@ static void
 bench_ccm_authenticate_do_bench (struct bench_obj *obj, void *buf,
 				 size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8] = { 0, };
   char nonce[11] = { 0x80, 0x01, };
@@ -1030,7 +1186,7 @@ static void
 bench_aead_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen,
 			     const char *nonce, size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16];
 
@@ -1060,7 +1216,7 @@ static void
 bench_aead_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen,
 			     const char *nonce, size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16] = { 0, };
 
@@ -1093,7 +1249,7 @@ bench_aead_authenticate_do_bench (struct bench_obj *obj, void *buf,
 				  size_t buflen, const char *nonce,
 				  size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16] = { 0, };
   char data = 0xff;
@@ -1360,6 +1516,7 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode)
   struct bench_cipher_mode mode = *pmode;
   struct bench_obj obj = { 0 };
   double result;
+  double bench_ghz;
   unsigned int blklen;
 
   mode.algo = algo;
@@ -1404,9 +1561,9 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 
@@ -1483,7 +1640,7 @@ bench_hash_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -1491,7 +1648,7 @@ bench_hash_init (struct bench_obj *obj)
 static void
 bench_hash_free (struct bench_obj *obj)
 {
-  gcry_md_hd_t hd = obj->priv;
+  gcry_md_hd_t hd = obj->hd;
 
   gcry_md_close (hd);
 }
@@ -1499,7 +1656,7 @@ bench_hash_free (struct bench_obj *obj)
 static void
 bench_hash_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_md_hd_t hd = obj->priv;
+  gcry_md_hd_t hd = obj->hd;
 
   gcry_md_reset (hd);
   gcry_md_write (hd, buf, buflen);
@@ -1524,6 +1681,7 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode)
 {
   struct bench_hash_mode mode = *pmode;
   struct bench_obj obj = { 0 };
+  double bench_ghz;
   double result;
 
   mode.algo = algo;
@@ -1536,9 +1694,9 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 static void
@@ -1645,7 +1803,7 @@ bench_mac_init (struct bench_obj *obj)
       break;
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   free (key);
   return 0;
@@ -1654,7 +1812,7 @@ bench_mac_init (struct bench_obj *obj)
 static void
 bench_mac_free (struct bench_obj *obj)
 {
-  gcry_mac_hd_t hd = obj->priv;
+  gcry_mac_hd_t hd = obj->hd;
 
   gcry_mac_close (hd);
 }
@@ -1662,7 +1820,7 @@ bench_mac_free (struct bench_obj *obj)
 static void
 bench_mac_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_mac_hd_t hd = obj->priv;
+  gcry_mac_hd_t hd = obj->hd;
   size_t bs;
   char b;
 
@@ -1690,6 +1848,7 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode)
 {
   struct bench_mac_mode mode = *pmode;
   struct bench_obj obj = { 0 };
+  double bench_ghz;
   double result;
 
   mode.algo = algo;
@@ -1702,9 +1861,9 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 static void
@@ -1807,9 +1966,11 @@ kdf_bench_one (int algo, int subalgo)
   struct bench_obj obj = { 0 };
   double nsecs_per_iteration;
   double cycles_per_iteration;
+  double bench_ghz;
   char algo_name[32];
   char nsecpiter_buf[16];
   char cpiter_buf[16];
+  char mhz_buf[16];
 
   mode.algo = algo;
   mode.subalgo = subalgo;
@@ -1843,31 +2004,45 @@ kdf_bench_one (int algo, int subalgo)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  nsecs_per_iteration = do_slope_benchmark (&obj);
+  nsecs_per_iteration = do_slope_benchmark (&obj, &bench_ghz);
 
   strcpy(cpiter_buf, csv_mode ? "" : "-");
+  strcpy(mhz_buf, csv_mode ? "" : "-");
 
   double_to_str (nsecpiter_buf, sizeof (nsecpiter_buf), nsecs_per_iteration);
 
   /* If user didn't provide CPU speed, we cannot show cycles/iter results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_iteration = nsecs_per_iteration * cpu_ghz;
+      cycles_per_iteration = nsecs_per_iteration * bench_ghz;
       double_to_str (cpiter_buf, sizeof (cpiter_buf), cycles_per_iteration);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
 
   if (csv_mode)
     {
-      printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n",
-	      current_section_name,
-	      current_algo_name ? current_algo_name : "",
-	      current_mode_name ? current_mode_name : "",
-	      nsecpiter_buf,
-	      cpiter_buf);
+      if (auto_ghz)
+        printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter,%s,Mhz\n",
+                current_section_name,
+                current_algo_name ? current_algo_name : "",
+                current_mode_name ? current_mode_name : "",
+                nsecpiter_buf,
+                cpiter_buf,
+                mhz_buf);
+      else
+        printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n",
+                current_section_name,
+                current_algo_name ? current_algo_name : "",
+                current_mode_name ? current_mode_name : "",
+                nsecpiter_buf,
+                cpiter_buf);
     }
   else
     {
-      printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf);
+      if (auto_ghz)
+        printf ("%14s %13s %9s\n", nsecpiter_buf, cpiter_buf, mhz_buf);
+      else
+        printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf);
     }
 }
 
@@ -1882,7 +2057,10 @@ kdf_bench (char **argv, int argc)
   if (!csv_mode)
     {
       printf (" %-*s | ", 24, "");
-      printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter");
+      if (auto_ghz)
+        printf ("%14s %13s %9s\n", "nanosecs/iter", "cycles/iter", "auto Mhz");
+      else
+        printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter");
     }
 
   if (argv && argc)
@@ -1923,7 +2101,8 @@ print_help (void)
     "",
     " options:",
     "   --cpu-mhz <mhz>           Set CPU speed for calculating cycles",
-    "                             per bytes results.",
+    "                             per bytes results.  Set as \"auto\"",
+    "                             for auto-detection of CPU speed.",
     "   --disable-hwf <features>  Disable hardware acceleration feature(s)",
     "                             for benchmarking.",
     "   --repetitions <n>         Use N repetitions (default "
@@ -2039,8 +2218,15 @@ main (int argc, char **argv)
 	  argv++;
 	  if (argc)
 	    {
-	      cpu_ghz = atof (*argv);
-	      cpu_ghz /= 1000;	/* Mhz => Ghz */
+              if (!strcmp (*argv, "auto"))
+                {
+                  auto_ghz = 1;
+                }
+              else
+                {
+                  cpu_ghz = atof (*argv);
+                  cpu_ghz /= 1000;	/* Mhz => Ghz */
+                }
 
 	      argc--;
 	      argv++;


From smueller at chronox.de  Mon Mar 12 08:45:59 2018
From: smueller at chronox.de (Stephan Mueller)
Date: Mon, 12 Mar 2018 08:45:59 +0100
Subject: SP800-38F AES kw encryption broken?
Message-ID: <4386046.jcxHWR5DvT@tauon.chronox.de>

Hi,

The function _gcry_cipher_aeswrap_encrypt seems to be broken as it does not 
produce the expected ciphertext.

When I invoke the encryption operation with the following data

key = f59782f1dceb0544a8da06b34969b9212b55ce6dcbdd0975a33f4b3f88b538da
plain = 73d33060b5f9f2eb5785c0703ddfa704

I obtain the following:

ciphertext = 888268c16299bc292154bd5ee49a27a521d3299e02eff900

However, when I decrypt the ciphertext immediately following the encryption 
operation, I get the following

plain = a6a6a6a6a6a6a6a65785c0703ddfa704

This seems to indicate that the first semiblock of the plaintext does not seem 
to be used at all but rather is replaced with the default IV.

The decryption function works as expected.

Unfortunately the current git repo cannot be compiled ("cannot find mpi-
internal.h", no rule to generate chacha20-sse2-amd64.S) so I cannot debug the 
code.

Ciao
Stephan


From smueller at chronox.de  Mon Mar 12 22:24:37 2018
From: smueller at chronox.de (Stephan Mueller)
Date: Mon, 12 Mar 2018 22:24:37 +0100
Subject: [PATCH] AES-KW: fix in-place encryption
In-Reply-To: <4386046.jcxHWR5DvT@tauon.chronox.de>
References: <4386046.jcxHWR5DvT@tauon.chronox.de>
Message-ID: <10870564.SVcAGkk7tg@tauon.chronox.de>

In case AES-KW in-place encryption is performed, the plaintext must be
moved to the correct destination location before the first semiblock of
the destination buffer is modified. Without the patch, the first
semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6.

* cipher/cipher-aeswrap.c: move memove call before KW IV setting

Signed-off-by: Stephan Mueller <smueller at chronox.de>
---
 cipher/cipher-aeswrap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c
index 698742df..a8d0e03e 100644
--- a/cipher/cipher-aeswrap.c
+++ b/cipher/cipher-aeswrap.c
@@ -70,6 +70,9 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
   a = outbuf;  /* We store A directly in OUTBUF.  */
   b = c->u_ctr.ctr;  /* B is also used to concatenate stuff.  */
 
+  /* Copy the inbuf to the outbuf. */
+  memmove (r+8, inbuf, inbuflen);
+
   /* If an IV has been set we use that IV as the Alternative Initial
      Value; if it has not been set we use the standard value.  */
   if (c->marks.iv)
@@ -77,9 +80,6 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
   else
     memset (a, 0xa6, 8);
 
-  /* Copy the inbuf to the outbuf. */
-  memmove (r+8, inbuf, inbuflen);
-
   memset (t, 0, sizeof t); /* t := 0.  */
 
   for (j = 0; j <= 5; j++)
-- 
2.14.3


From martin at martin.st  Thu Mar 22 09:56:14 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 10:56:14 +0200
Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an int
 parameter
In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
Message-ID: <1521708975-30902-4-git-send-email-martin@martin.st>

The keybits parameter is declared as int, and in those cases, the
upper half of a register is undefined, not guaranteed to be zero.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
I didn't check other files and functions for the same issue, I
just happened to notice this one while looking closer at the
camellia source file.

>From previous experience, clang can be pretty aggressive with
passing in undefined data in the upper half of registers, where
it isn't supposed to make any difference.
---
 cipher/camellia-aarch64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 440f69f..68d2a7d 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -33,7 +33,7 @@
 #define CTX x0
 #define RDST x1
 #define RSRC x2
-#define RKEYBITS x3
+#define RKEYBITS w3
 
 #define RTAB1 x4
 #define RTAB2 x5
-- 
2.7.4


From martin at martin.st  Thu Mar 22 09:56:15 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 10:56:15 +0200
Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly for
 windows
In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
Message-ID: <1521708975-30902-5-git-send-email-martin@martin.st>

Don't require .type and .size in configure; we can make
them optional via a preprocessor macro.

This is mostly a mechanical change, wrapping the .type and .size
directives in an ELF() macro, with two actual manual changes
(when targeting windows):
- Don't load global symbols via a GOT (in chacha20)
- Don't use the x18 register (in camellia); back up and restore x19
  in the prologue/epilogue and use that instead (on windows only).

x18 is a platform specific register; on linux, it's free to be used
by user code, while it's reserved for platform use on windows and
darwin.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
This isn't strictly necessary for building libgcrypt for windows
on aarch64; previously configure concludes that the assembly can't
be built since the .type and .size directives don't work. This
just allows using more of the existing assembly routines.

This also probably has the effect that the same assembly gets
enabled when targeting darwin (iOS), but building with assembly
enabled doesn't work for darwin anyway (even prior to this change,
since darwin requires an extra leading underscore on all symbols,
while the mpi/aarch64 code gets automatically enabled).
---
 cipher/asm-common-aarch64.h          | 32 ++++++++++++++++++++
 cipher/camellia-aarch64.S            | 34 ++++++++++++++++-----
 cipher/chacha20-aarch64.S            | 12 ++++++--
 cipher/cipher-gcm-armv8-aarch64-ce.S | 10 +++----
 cipher/rijndael-aarch64.S            | 10 +++----
 cipher/rijndael-armv8-aarch64-ce.S   | 58 ++++++++++++++++++------------------
 cipher/sha1-armv8-aarch64-ce.S       |  6 ++--
 cipher/sha256-armv8-aarch64-ce.S     |  6 ++--
 cipher/twofish-aarch64.S             | 10 +++----
 configure.ac                         |  4 ---
 10 files changed, 118 insertions(+), 64 deletions(-)
 create mode 100644 cipher/asm-common-aarch64.h

diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
new file mode 100644
index 0000000..814b7ad
--- /dev/null
+++ b/cipher/asm-common-aarch64.h
@@ -0,0 +1,32 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#include <config.h>
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 68d2a7d..ec4ebef 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -19,11 +19,17 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
 
+#ifdef _WIN32
+# define WIN(...) __VA_ARGS__
+#else
+# define WIN(...) /*_*/
+#endif
+
 .text
 
 /* struct camellia_ctx: */
@@ -55,12 +61,21 @@
 #define RT0 w15
 #define RT1 w16
 #define RT2 w17
+#ifdef _WIN32
+#define RT3 w19
+#else
 #define RT3 w18
+#endif
 
 #define xRT0 x15
 #define xRT1 x16
 #define xRT2 x17
+#ifdef _WIN32
+/* w18/x18 is reserved and can't be used on windows. */
+#define xRT3 x19
+#else
 #define xRT3 x18
+#endif
 
 #ifdef __AARCH64EL__
   #define host_to_be(reg, rtmp) \
@@ -198,9 +213,10 @@
 	str_output_be(RDST, YL, YR, XL, XR, RT0, RT1);
 
 .globl _gcry_camellia_arm_encrypt_block
-.type   _gcry_camellia_arm_encrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_encrypt_block, at function;)
 
 _gcry_camellia_arm_encrypt_block:
+	WIN(stp x19, x30, [sp, #-16]!)
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -227,6 +243,7 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(24);
 
+	WIN(ldp x19, x30, [sp], #16)
 	ret;
 .ltorg
 
@@ -236,14 +253,16 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(32);
 
+	WIN(ldp x19, x30, [sp], #16)
 	ret;
 .ltorg
-.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
+ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
 
 .globl _gcry_camellia_arm_decrypt_block
-.type   _gcry_camellia_arm_decrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_decrypt_block, at function;)
 
 _gcry_camellia_arm_decrypt_block:
+	WIN(stp x19, x30, [sp, #-16]!)
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -271,6 +290,7 @@ _gcry_camellia_arm_decrypt_block:
 
 	outunpack(0);
 
+	WIN(ldp x19, x30, [sp], #16)
 	ret;
 .ltorg
 
@@ -281,11 +301,11 @@ _gcry_camellia_arm_decrypt_block:
 
 	b .Ldec_128;
 .ltorg
-.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
+ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
 
 /* Encryption/Decryption tables */
 .globl _gcry_camellia_arm_tables
-.type  _gcry_camellia_arm_tables, at object;
+ELF(.type  _gcry_camellia_arm_tables, at object;)
 .balign 32
 _gcry_camellia_arm_tables:
 .Lcamellia_sp1110:
@@ -551,7 +571,7 @@ _gcry_camellia_arm_tables:
 .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
-.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;
+ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 5990a08..3844d4e 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -27,7 +27,7 @@
  * Public domain.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -38,9 +38,15 @@
 
 .text
 
+#ifdef _WIN32
+#define GET_DATA_POINTER(reg, name) \
+	adrp    reg, name ; \
+	add     reg, reg, #:lo12:name ;
+#else
 #define GET_DATA_POINTER(reg, name) \
 	adrp    reg, :got:name ; \
 	ldr     reg, [reg, #:got_lo12:name] ;
+#endif
 
 /* register macros */
 #define INPUT     x0
@@ -148,7 +154,7 @@ chacha20_data:
 
 .align 3
 .globl _gcry_chacha20_aarch64_blocks4
-.type _gcry_chacha20_aarch64_blocks4,%function;
+ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
 
 _gcry_chacha20_aarch64_blocks4:
 	/* input:
@@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4:
 
 	eor x0, x0, x0
 	ret
-.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;
+ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;)
 
 #endif
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 0cfaf1c..b6c4f59 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -174,7 +174,7 @@ gcry_gcm_reduction_constant:
  */
 .align 3
 .globl _gcry_ghash_armv8_ce_pmull
-.type  _gcry_ghash_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_armv8_ce_pmull,%function;)
 _gcry_ghash_armv8_ce_pmull:
   /* input:
    *    x0: gcm_key
@@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;
+ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;)
 
 
 /*
@@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull:
  */
 .align 3
 .globl _gcry_ghash_setup_armv8_ce_pmull
-.type  _gcry_ghash_setup_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_setup_armv8_ce_pmull,%function;)
 _gcry_ghash_setup_armv8_ce_pmull:
   /* input:
    *	x0: gcm_key
@@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull:
   st1 {rh5.16b-rh6.16b}, [x1]
 
   ret
-.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;
+ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;)
 
 #endif
diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S
index e533bbe..aad7487 100644
--- a/cipher/rijndael-aarch64.S
+++ b/cipher/rijndael-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -206,7 +206,7 @@
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_encrypt_block
-.type   _gcry_aes_arm_encrypt_block,%function;
+ELF(.type   _gcry_aes_arm_encrypt_block,%function;)
 
 _gcry_aes_arm_encrypt_block:
 	/* input:
@@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block:
 	lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
 
 	b .Lenc_done;
-.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;)
 
 #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
 	ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
@@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block:
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_decrypt_block
-.type   _gcry_aes_arm_decrypt_block,%function;
+ELF(.type   _gcry_aes_arm_decrypt_block,%function;)
 
 _gcry_aes_arm_decrypt_block:
 	/* input:
@@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block:
 	decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
 
 	b .Ldec_tail;
-.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;
+ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__ */
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 40097a7..5859557 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -239,7 +239,7 @@
  */
 .align 3
 .globl _gcry_aes_enc_armv8_ce
-.type  _gcry_aes_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_enc_armv8_ce,%function;)
 _gcry_aes_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Lenc1_tail
-.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;
+ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;)
 
 
 /*
@@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_dec_armv8_ce
-.type  _gcry_aes_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_dec_armv8_ce,%function;)
 _gcry_aes_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Ldec1_tail
-.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;
+ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;)
 
 
 /*
@@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_enc_armv8_ce
-.type  _gcry_aes_cbc_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_enc_armv8_ce,%function;)
 _gcry_aes_cbc_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .Lcbc_enc_skip:
   ret
-.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;
+ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;)
 
 /*
  * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
@@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_dec_armv8_ce
-.type  _gcry_aes_cbc_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_dec_armv8_ce,%function;)
 _gcry_aes_cbc_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .Lcbc_dec_skip:
   ret
-.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;
+ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;)
 
 
 /*
@@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ctr_enc_armv8_ce
-.type  _gcry_aes_ctr_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ctr_enc_armv8_ce,%function;)
 _gcry_aes_ctr_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 .Lctr_enc_skip:
   ret
 
-.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;
+ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;)
 
 
 /*
@@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_enc_armv8_ce
-.type  _gcry_aes_cfb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_enc_armv8_ce,%function;)
 _gcry_aes_cfb_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .Lcfb_enc_skip:
   ret
-.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;
+ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;)
 
 
 /*
@@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_dec_armv8_ce
-.type  _gcry_aes_cfb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_dec_armv8_ce,%function;)
 _gcry_aes_cfb_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .Lcfb_dec_skip:
   ret
-.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;
+ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;)
 
 
 /*
@@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_enc_armv8_ce
-.type  _gcry_aes_ocb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_enc_armv8_ce,%function;)
 _gcry_aes_ocb_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;
+ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;)
 
 
 /*
@@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_dec_armv8_ce
-.type  _gcry_aes_ocb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_dec_armv8_ce,%function;)
 _gcry_aes_ocb_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;
+ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;)
 
 
 /*
@@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_auth_armv8_ce
-.type  _gcry_aes_ocb_auth_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_auth_armv8_ce,%function;)
 _gcry_aes_ocb_auth_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
+ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
 
 
 /*
@@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_enc_armv8_ce
-.type  _gcry_aes_xts_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_enc_armv8_ce,%function;)
 _gcry_aes_xts_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce:
 .Lxts_enc_skip:
   ret
 
-.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
 
 
 /*
@@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_dec_armv8_ce
-.type  _gcry_aes_xts_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_dec_armv8_ce,%function;)
 _gcry_aes_xts_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce:
 .Lxts_dec_skip:
   ret
 
-.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
 
 
 /*
@@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_sbox4_armv8_ce
-.type  _gcry_aes_sbox4_armv8_ce,%function;
+ELF(.type  _gcry_aes_sbox4_armv8_ce,%function;)
 _gcry_aes_sbox4_armv8_ce:
   /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in
    * Cryptology ? CT-RSA 2015" for details.
@@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce:
   mov w0, v0.S[0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
+ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
 
 
 /*
@@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_invmixcol_armv8_ce
-.type  _gcry_aes_invmixcol_armv8_ce,%function;
+ELF(.type  _gcry_aes_invmixcol_armv8_ce,%function;)
 _gcry_aes_invmixcol_armv8_ce:
   ld1 {v0.16b}, [x1]
   aesimc v0.16b, v0.16b
   st1 {v0.16b}, [x0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
+ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
 
 #endif
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index ec1810d..aeb67a1 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC:
  */
 .align 3
 .globl _gcry_sha1_transform_armv8_ce
-.type  _gcry_sha1_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha1_transform_armv8_ce,%function;)
 _gcry_sha1_transform_armv8_ce:
   /* input:
    *	x0: ctx, CTX
@@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;
+ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index a4575da..6b3ad32 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K:
  */
 .align 3
 .globl _gcry_sha256_transform_armv8_ce
-.type  _gcry_sha256_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha256_transform_armv8_ce,%function;)
 _gcry_sha256_transform_armv8_ce:
   /* input:
    *	r0: ctx, CTX
@@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;
+ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S
index 99c4675..adee412 100644
--- a/cipher/twofish-aarch64.S
+++ b/cipher/twofish-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -217,7 +217,7 @@
 	ror1(RD);
 
 .globl _gcry_twofish_arm_encrypt_block
-.type   _gcry_twofish_arm_encrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_encrypt_block,%function;)
 
 _gcry_twofish_arm_encrypt_block:
 	/* input:
@@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block:
 
 	ret;
 .ltorg
-.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
+ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;)
 
 .globl _gcry_twofish_arm_decrypt_block
-.type   _gcry_twofish_arm_decrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_decrypt_block,%function;)
 
 _gcry_twofish_arm_decrypt_block:
 	/* input:
@@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block:
 	str_output_le(RDST, RA, RB, RC, RD, RT0, RT1);
 
 	ret;
-.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
+ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/configure.ac b/configure.ac
index b5d7211..330485f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i
                 "eor x0, x0, x30, ror #12;\n\t"
                 "add x0, x0, x30, asr #12;\n\t"
                 "eor v0.16b, v0.16b, v31.16b;\n\t"
-
-                /* Test if '.type' and '.size' are supported.  */
-                ".size asmfunc,.-asmfunc;\n\t"
-                ".type asmfunc, at function;\n\t"
             );]])],
           [gcry_cv_gcc_aarch64_platform_as_ok=yes])])
 if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then
-- 
2.7.4


From martin at martin.st  Thu Mar 22 09:56:11 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 10:56:11 +0200
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
Message-ID: <1521708975-30902-1-git-send-email-martin@martin.st>

This fixes building this file for windows on aarch64.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 random/rndw32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/random/rndw32.c b/random/rndw32.c
index 7e9ac50..08a8867 100644
--- a/random/rndw32.c
+++ b/random/rndw32.c
@@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
 
      On AMD64, TSC is always available and intrinsic is provided for accessing
      it.  */
-#ifdef __WIN64__
+#ifdef __x86_64__
     {
       unsigned __int64 aint64;
 
@@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
           (*add) (&aword, sizeof (aword), origin );
         }
     }
-#endif /*__WIN64__*/
+#endif /*__x86_64__*/
 
 
 }
-- 
2.7.4


From martin at martin.st  Thu Mar 22 09:56:13 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 10:56:13 +0200
Subject: [PATCH 3/5] aarch64: Fix assembling chacha20-aarch64.S with clang/llvm
In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
Message-ID: <1521708975-30902-3-git-send-email-martin@martin.st>

When referring to a specific lane, one doesn't need to specify
the total number of lanes of the register. With GNU binutils,
both forms are accepted, while clang/llvm rejects the form
with the unnecessary number of lanes.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 cipher/chacha20-aarch64.S | 60 +++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 739ddde..5990a08 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4:
 	mov ROUND, #20;
 	ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
 
-	dup X12.4s, X15.4s[0];
-	dup X13.4s, X15.4s[1];
+	dup X12.4s, X15.s[0];
+	dup X13.4s, X15.s[1];
 	ldr CTR, [INPUT_CTR];
 	add X12.4s, X12.4s, VCTR.4s;
-	dup X0.4s, VTMP1.4s[0];
-	dup X1.4s, VTMP1.4s[1];
-	dup X2.4s, VTMP1.4s[2];
-	dup X3.4s, VTMP1.4s[3];
-	dup X14.4s, X15.4s[2];
+	dup X0.4s, VTMP1.s[0];
+	dup X1.4s, VTMP1.s[1];
+	dup X2.4s, VTMP1.s[2];
+	dup X3.4s, VTMP1.s[3];
+	dup X14.4s, X15.s[2];
 	cmhi VTMP0.4s, VCTR.4s, X12.4s;
-	dup X15.4s, X15.4s[3];
+	dup X15.4s, X15.s[3];
 	add CTR, CTR, #4; /* Update counter */
-	dup X4.4s, VTMP2.4s[0];
-	dup X5.4s, VTMP2.4s[1];
-	dup X6.4s, VTMP2.4s[2];
-	dup X7.4s, VTMP2.4s[3];
+	dup X4.4s, VTMP2.s[0];
+	dup X5.4s, VTMP2.s[1];
+	dup X6.4s, VTMP2.s[2];
+	dup X7.4s, VTMP2.s[3];
 	sub X13.4s, X13.4s, VTMP0.4s;
-	dup X8.4s, VTMP3.4s[0];
-	dup X9.4s, VTMP3.4s[1];
-	dup X10.4s, VTMP3.4s[2];
-	dup X11.4s, VTMP3.4s[3];
+	dup X8.4s, VTMP3.s[0];
+	dup X9.4s, VTMP3.s[1];
+	dup X10.4s, VTMP3.s[2];
+	dup X11.4s, VTMP3.s[3];
 	mov X12_TMP.16b, X12.16b;
 	mov X13_TMP.16b, X13.16b;
 	str CTR, [INPUT_CTR];
@@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X12, X12_TMP);        /* INPUT + 12 * 4 + counter */
 	PLUS(X13, X13_TMP);        /* INPUT + 13 * 4 + counter */
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
 	PLUS(X0, VTMP2);
 	PLUS(X1, VTMP3);
 	PLUS(X2, X12_TMP);
 	PLUS(X3, X13_TMP);
 
-	dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */
-	dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */
-	dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */
-	dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */
+	dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+	dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+	dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+	dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
 	ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
 	mov INPUT_POS, INPUT;
 	PLUS(X4, VTMP2);
@@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X6, X12_TMP);
 	PLUS(X7, X13_TMP);
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */
-	dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */
-	dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+	dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+	dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
 	PLUS(X8, VTMP2);
 	PLUS(X9, VTMP3);
 	PLUS(X10, X12_TMP);
-- 
2.7.4


From martin at martin.st  Thu Mar 22 09:56:12 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 10:56:12 +0200
Subject: [PATCH 2/5] aarch64: mpi: Fix building the mpi aarch64 assembly for
 windows
In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
Message-ID: <1521708975-30902-2-git-send-email-martin@martin.st>

The mpi aarch64 assembly is enabled as soon as the compiler supports
inline assembly, without checking for .type and .size, as is done
for the rest of the assembly in cipher/*.S. (The .type and .size
directives are only supported on ELF.)

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 mpi/aarch64/mpih-add1.S  |  5 +++--
 mpi/aarch64/mpih-mul1.S  |  5 +++--
 mpi/aarch64/mpih-mul2.S  |  5 +++--
 mpi/aarch64/mpih-mul3.S  |  5 +++--
 mpi/aarch64/mpih-sub1.S  |  5 +++--
 mpi/asm-common-aarch64.h | 30 ++++++++++++++++++++++++++++++
 6 files changed, 45 insertions(+), 10 deletions(-)
 create mode 100644 mpi/asm-common-aarch64.h

diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index fa8cd01..4ead1c2 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_add_n
-.type  _gcry_mpih_add_n,%function
+ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
 	and	x5, x3, #3;
 	adds	xzr, xzr, xzr; /* clear carry flag */
@@ -68,4 +69,4 @@ _gcry_mpih_add_n:
 .Lend:
 	adc	x0, xzr, xzr;
 	ret;
-.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
+ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;)
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 65e98fe..8a86269 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_mul_1
-.type  _gcry_mpih_mul_1,%function
+ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
 	and	x5, x2, #3;
 	mov	x4, xzr;
@@ -93,4 +94,4 @@ _gcry_mpih_mul_1:
 .Lend:
 	mov	x0, x4;
 	ret;
-.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
+ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;)
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index bd3b2c9..c7c08e5 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_addmul_1
-.type  _gcry_mpih_addmul_1,%function
+ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
 	and	x5, x2, #3;
 	mov	x6, xzr;
@@ -105,4 +106,4 @@ _gcry_mpih_addmul_1:
 .Lend:
 	mov	x0, x6;
 	ret;
-.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
+ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;)
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index a58bc53..ccc961e 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_submul_1
-.type  _gcry_mpih_submul_1,%function
+ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
 	and	x5, x2, #3;
 	mov	x7, xzr;
@@ -118,4 +119,4 @@ _gcry_mpih_submul_1:
 .Loop_end:
 	cinc	x0, x7, cc;
 	ret;
-.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
+ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;)
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index cbf2f08..4a66373 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_sub_n
-.type  _gcry_mpih_sub_n,%function
+ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
 	and	x5, x3, #3;
 	subs	xzr, xzr, xzr; /* prepare carry flag for sub */
@@ -68,4 +69,4 @@ _gcry_mpih_sub_n:
 .Lend:
 	cset	x0, cc;
 	ret;
-.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
+ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;)
diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h
new file mode 100644
index 0000000..1f646cb
--- /dev/null
+++ b/mpi/asm-common-aarch64.h
@@ -0,0 +1,30 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
-- 
2.7.4


From cvs at cvs.gnupg.org  Thu Mar 22 15:35:41 2018
From: cvs at cvs.gnupg.org (by Werner Koch)
Date: Thu, 22 Mar 2018 15:35:41 +0100
Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-46-g92fd86e
Message-ID: <E1ez1Je-0006yP-SA@lists.gnupg.org>

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  92fd86e9956ef3fea51d72495fd0da09522e57a1 (commit)
      from  0b3ec359e2279c3b46b171372b1b7733bba20cd7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 92fd86e9956ef3fea51d72495fd0da09522e57a1
Author: Werner Koch <wk at gnupg.org>
Date:   Thu Mar 22 15:28:04 2018 +0100

    doc: Clarify the value range of the use-rsa-e parameter.
    
    --
    
    Signed-off-by: Werner Koch <wk at gnupg.org>

diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index bba07a4..967745f 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
@@ -2905,7 +2905,9 @@ Use the given value.
 
 @noindent
 If this parameter is not used, Libgcrypt uses for historic reasons
-65537.
+65537.  Note that the value must fit into a 32 bit unsigned variable
+and that the usual C prefixes are considered (e.g. 017 gives 15).
+
 
 @item qbits @var{n}
 This is only meanigful for DSA keys.  If it is given the DSA key is
diff --git a/src/sexp.c b/src/sexp.c
index 0462d92..9d89268 100644
--- a/src/sexp.c
+++ b/src/sexp.c
@@ -401,7 +401,7 @@ _gcry_sexp_vlist( const gcry_sexp_t a, ... )
 
 /****************
  * Append n to the list a
- * Returns: a new ist (which maybe a)
+ * Returns: a new list (which maybe a)
  */
 gcry_sexp_t
 _gcry_sexp_append( const gcry_sexp_t a, const gcry_sexp_t n )

-----------------------------------------------------------------------

Summary of changes:
 doc/gcrypt.texi | 4 +++-
 src/sexp.c      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits


From jussi.kivilinna at iki.fi  Thu Mar 22 20:10:40 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Thu, 22 Mar 2018 21:10:40 +0200
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
Message-ID: <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>

Hello,

On 22.03.2018 10:56, Martin Storsj? wrote:
> This fixes building this file for windows on aarch64.
> 
> Signed-off-by: Martin Storsj? <martin at martin.st>

Thanks for the patches. There is few generic things that needs to be done
before these can be applied:
 - Check signing off DCO part in doc/HACKING
 - Add ChangeLog entries to beginning of commit log, see existing commit
   messages for example.

I'm interested in testing these patches myself. Can you give some pointers
for how to cross-compile libgcrypt for windows/aarch64?

-Jussi

> ---
>  random/rndw32.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/random/rndw32.c b/random/rndw32.c
> index 7e9ac50..08a8867 100644
> --- a/random/rndw32.c
> +++ b/random/rndw32.c
> @@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
>  
>       On AMD64, TSC is always available and intrinsic is provided for accessing
>       it.  */
> -#ifdef __WIN64__
> +#ifdef __x86_64__
>      {
>        unsigned __int64 aint64;
>  
> @@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
>            (*add) (&aword, sizeof (aword), origin );
>          }
>      }
> -#endif /*__WIN64__*/
> +#endif /*__x86_64__*/
>  
>  
>  }
> 


From jussi.kivilinna at iki.fi  Thu Mar 22 20:18:29 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Thu, 22 Mar 2018 21:18:29 +0200
Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly
 for windows
In-Reply-To: <1521708975-30902-5-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <1521708975-30902-5-git-send-email-martin@martin.st>
Message-ID: <89faffe8-4964-e29f-8a69-0696b6ce93cc@iki.fi>

Hello,

On 22.03.2018 10:56, Martin Storsj? wrote:
> Don't require .type and .size in configure; we can make
> them optional via a preprocessor macro.
> 
> This is mostly a mechanical change, wrapping the .type and .size
> directives in an ELF() macro, with two actual manual changes
> (when targeting windows):
> - Don't load global symbols via a GOT (in chacha20)
> - Don't use the x18 register (in camellia); back up and restore x19
>   in the prologue/epilogue and use that instead (on windows only).
> 
> x18 is a platform specific register; on linux, it's free to be used
> by user code, while it's reserved for platform use on windows and
> darwin.
> 
...snip...
>  
> +#ifdef _WIN32
> +# define WIN(...) __VA_ARGS__
> +#else
> +# define WIN(...) /*_*/
> +#endif
> +
>  .text
>  
>  /* struct camellia_ctx: */
> @@ -55,12 +61,21 @@
>  #define RT0 w15
>  #define RT1 w16
>  #define RT2 w17
> +#ifdef _WIN32
> +#define RT3 w19
> +#else
>  #define RT3 w18
> +#endif
>  
>  #define xRT0 x15
>  #define xRT1 x16
>  #define xRT2 x17
> +#ifdef _WIN32
> +/* w18/x18 is reserved and can't be used on windows. */
> +#define xRT3 x19
> +#else
>  #define xRT3 x18
> +#endif
>  
Now that I know x18 is reserved for special purpose, I'd actually prefer
if x18 would be switched to x19 on linux too. So, no need for _WIN32 
ifdefs and WIN() macro.

-Jussi


From jussi.kivilinna at iki.fi  Thu Mar 22 20:19:48 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Thu, 22 Mar 2018 21:19:48 +0200
Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an
 int parameter
In-Reply-To: <1521708975-30902-4-git-send-email-martin@martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <1521708975-30902-4-git-send-email-martin@martin.st>
Message-ID: <a1c34035-e18e-0015-bf27-8a37217259d8@iki.fi>

Hello,

On 22.03.2018 10:56, Martin Storsj? wrote:
> The keybits parameter is declared as int, and in those cases, the
> upper half of a register is undefined, not guaranteed to be zero.
> 
> Signed-off-by: Martin Storsj? <martin at martin.st>
> ---
> I didn't check other files and functions for the same issue, I
> just happened to notice this one while looking closer at the
> camellia source file.
> 
> From previous experience, clang can be pretty aggressive with
> passing in undefined data in the upper half of registers, where
> it isn't supposed to make any difference.

I'll do review on the other aarch64 implementations for this.

-Jussi


From cvs at cvs.gnupg.org  Thu Mar 22 20:50:40 2018
From: cvs at cvs.gnupg.org (by Jussi Kivilinna)
Date: Thu, 22 Mar 2018 20:50:40 +0100
Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-49-g617f5e7
Message-ID: <E1ez6ET-0000fP-DC@lists.gnupg.org>

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  617f5e746f8295cc36d1002c8c53edc95d04d0f6 (commit)
       via  3841b23c0ccb24d555b7570083bba958e3126d26 (commit)
       via  a1127dbbada4302abf09eec90fbaceca87bfcdf0 (commit)
      from  92fd86e9956ef3fea51d72495fd0da09522e57a1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 617f5e746f8295cc36d1002c8c53edc95d04d0f6
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Thu Mar 22 21:42:23 2018 +0200

    bench-slope: add CPU frequency auto-detection
    
    * tests/bench-slope.c (bench_obj): Add 'hd'.
    (bench_encrypt_init, bench_encrypt_free, bench_encrypt_do_bench)
    (bench_decrypt_do_bench, bench_xts_encrypt_init)
    (bench_xts_encrypt_do_bench, bench_xts_decrypt_do_bench)
    (bench_ccm_encrypt_init, bench_ccm_encrypt_do_bench)
    (bench_ccm_decrypt_do_bench, bench_aead_encrypt_init)
    (bench_aead_encrypt_do_bench, bench_aead_decrypt_do_bench)
    (bench_hash_init, bench_hash_free, bench_hash_do_bench)
    (bench_mac_init, bench_mac_free, bench_mac_do_bench): Use 'obj->hd'
    for storing pointer to crypto context.
    (auto_ghz): New.
    (do_slope_benchmark): Rename to...
    (slope_benchmark): ...this.
    (auto_ghz_init, auto_ghz_free, auto_ghz_bench, auto_ghz_detect_ops)
    (get_auto_ghz, do_slope_benchmark): New.
    (double_to_str): Round number larger than 1000 to integer.
    (bench_print_result_csv, bench_print_result_std)
    (bench_print_result, bench_print_header, cipher_bench_one)
    (hash_bench_one, mac_bench_one, kdf_bench_one, kdf_bench): Add
    auto-detected frequency printing.
    (print_help): Help for CPU speed auto-detection mode.
    (main): Add parsing for "--cpu-mhz auto".
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index e34104f..5c64f22 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -50,6 +50,9 @@ static int num_measurement_repetitions;
    results.  */
 static double cpu_ghz = -1;
 
+/* Attempt to autodetect CPU Ghz. */
+static int auto_ghz;
+
 /* Whether we are running as part of the regression test suite.  */
 static int in_regression_test;
 
@@ -220,6 +223,7 @@ struct bench_obj
   unsigned int step_size;
 
   void *priv;
+  void *hd;
 };
 
 typedef int (*const bench_initialize_t) (struct bench_obj * obj);
@@ -383,7 +387,7 @@ adjust_loop_iterations_to_timer_accuracy (struct bench_obj *obj, void *buffer,
 
 /* Benchmark and return linear regression slope in nanoseconds per byte.  */
 double
-do_slope_benchmark (struct bench_obj *obj)
+slope_benchmark (struct bench_obj *obj)
 {
   unsigned int num_measurements;
   double *measurements = NULL;
@@ -464,6 +468,122 @@ err_free:
   return -1;
 }
 
+/********************************************* CPU frequency auto-detection. */
+
+static int
+auto_ghz_init (struct bench_obj *obj)
+{
+  obj->min_bufsize = 16;
+  obj->max_bufsize = 64 + obj->min_bufsize;
+  obj->step_size = 8;
+  obj->num_measure_repetitions = 16;
+
+  return 0;
+}
+
+static void
+auto_ghz_free (struct bench_obj *obj)
+{
+  (void)obj;
+}
+
+static void
+auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen)
+{
+  (void)obj;
+  (void)buf;
+
+  buflen *= 1024;
+
+  /* Turbo frequency detection benchmark. Without CPU turbo-boost, this
+   * function will give cycles/iteration result 1024.0 on high-end CPUs.
+   * With turbo, result will be less and can be used detect turbo-clock. */
+
+  do
+    {
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+      /* Use memory barrier to prevent compiler from optimizing this loop
+       * away. */
+
+      asm volatile ("":::"memory");
+#else
+      /* TODO: Needs alternative way. */
+#endif
+    }
+  while (--buflen);
+}
+
+static struct bench_ops auto_ghz_detect_ops = {
+  &auto_ghz_init,
+  &auto_ghz_free,
+  &auto_ghz_bench
+};
+
+
+double
+get_auto_ghz (void)
+{
+  struct bench_obj obj = { 0 };
+  double nsecs_per_iteration;
+  double cycles_per_iteration;
+
+  obj.ops = &auto_ghz_detect_ops;
+
+  nsecs_per_iteration = slope_benchmark (&obj);
+
+  cycles_per_iteration = nsecs_per_iteration * cpu_ghz;
+
+  /* Adjust CPU Ghz so that cycles per iteration would give '1024.0'. */
+
+  return cpu_ghz * 1024 / cycles_per_iteration;
+}
+
+
+double
+do_slope_benchmark (struct bench_obj *obj, double *bench_ghz)
+{
+  double ret;
+
+  if (!auto_ghz)
+    {
+      /* Perform measurement without autodetection of CPU frequency. */
+
+      ret = slope_benchmark (obj);
+
+      *bench_ghz = cpu_ghz;
+    }
+  else
+    {
+      double cpu_auto_ghz_before;
+      double cpu_auto_ghz_after;
+      double nsecs_per_iteration;
+      double diff;
+
+      /* Perform measurement with CPU frequency autodetection. */
+
+      do
+        {
+          /* Repeat measurement until CPU turbo frequency has stabilized. */
+
+          cpu_auto_ghz_before = get_auto_ghz ();
+
+          nsecs_per_iteration = slope_benchmark (obj);
+
+          cpu_auto_ghz_after = get_auto_ghz ();
+
+          diff = 1.0 - (cpu_auto_ghz_before / cpu_auto_ghz_after);
+          diff = diff < 0 ? -diff : diff;
+        }
+      while (diff > 5e-5);
+
+      ret = nsecs_per_iteration;
+
+      *bench_ghz = cpu_auto_ghz_after;
+    }
+
+  return ret;
+}
+
 
 /********************************************************** Printing results. */
 
@@ -476,29 +596,34 @@ double_to_str (char *out, size_t outlen, double value)
     fmt = "%.3f";
   else if (value < 100.0)
     fmt = "%.2f";
-  else
+  else if (value < 1000.0)
     fmt = "%.1f";
+  else
+    fmt = "%.0f";
 
   snprintf (out, outlen, fmt, value);
 }
 
 static void
-bench_print_result_csv (double nsecs_per_byte)
+bench_print_result_csv (double nsecs_per_byte, double bench_ghz)
 {
   double cycles_per_byte, mbytes_per_sec;
   char nsecpbyte_buf[16];
   char mbpsec_buf[16];
   char cpbyte_buf[16];
+  char mhz_buf[16];
 
   *cpbyte_buf = 0;
+  *mhz_buf = 0;
 
   double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte);
 
   /* If user didn't provide CPU speed, we cannot show cycles/byte results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_byte = nsecs_per_byte * cpu_ghz;
+      cycles_per_byte = nsecs_per_byte * bench_ghz;
       double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
 
   mbytes_per_sec =
@@ -506,50 +631,76 @@ bench_print_result_csv (double nsecs_per_byte)
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
   /* We print two empty fields to allow for future enhancements.  */
-  printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n",
-          current_section_name,
-          current_algo_name? current_algo_name : "",
-          current_mode_name? current_mode_name : "",
-          nsecpbyte_buf,
-          mbpsec_buf,
-          cpbyte_buf);
-
+  if (auto_ghz)
+    {
+      printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B,%s,Mhz\n",
+              current_section_name,
+              current_algo_name? current_algo_name : "",
+              current_mode_name? current_mode_name : "",
+              nsecpbyte_buf,
+              mbpsec_buf,
+              cpbyte_buf,
+              mhz_buf);
+    }
+  else
+    {
+      printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n",
+              current_section_name,
+              current_algo_name? current_algo_name : "",
+              current_mode_name? current_mode_name : "",
+              nsecpbyte_buf,
+              mbpsec_buf,
+              cpbyte_buf);
+    }
 }
 
 static void
-bench_print_result_std (double nsecs_per_byte)
+bench_print_result_std (double nsecs_per_byte, double bench_ghz)
 {
   double cycles_per_byte, mbytes_per_sec;
   char nsecpbyte_buf[16];
   char mbpsec_buf[16];
   char cpbyte_buf[16];
+  char mhz_buf[16];
 
   double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte);
 
   /* If user didn't provide CPU speed, we cannot show cycles/byte results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_byte = nsecs_per_byte * cpu_ghz;
+      cycles_per_byte = nsecs_per_byte * bench_ghz;
       double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
   else
-    strcpy (cpbyte_buf, "-");
+    {
+      strcpy (cpbyte_buf, "-");
+      strcpy (mhz_buf, "-");
+    }
 
   mbytes_per_sec =
     (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024);
   double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec);
 
-  printf ("%9s ns/B %9s MiB/s %9s c/B\n",
-          nsecpbyte_buf, mbpsec_buf, cpbyte_buf);
+  if (auto_ghz)
+    {
+      printf ("%9s ns/B %9s MiB/s %9s c/B %9s\n",
+              nsecpbyte_buf, mbpsec_buf, cpbyte_buf, mhz_buf);
+    }
+  else
+    {
+      printf ("%9s ns/B %9s MiB/s %9s c/B\n",
+              nsecpbyte_buf, mbpsec_buf, cpbyte_buf);
+    }
 }
 
 static void
-bench_print_result (double nsecs_per_byte)
+bench_print_result (double nsecs_per_byte, double bench_ghz)
 {
   if (csv_mode)
-    bench_print_result_csv (nsecs_per_byte);
+    bench_print_result_csv (nsecs_per_byte, bench_ghz);
   else
-    bench_print_result_std (nsecs_per_byte);
+    bench_print_result_std (nsecs_per_byte, bench_ghz);
 }
 
 static void
@@ -578,8 +729,13 @@ bench_print_header (int algo_width, const char *algo_name)
         printf (" %-*s | ", -algo_width, algo_name);
       else
         printf (" %-*s | ", algo_width, algo_name);
-      printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec",
-              "cycles/byte");
+
+      if (auto_ghz)
+        printf ("%14s %15s %13s %9s\n", "nanosecs/byte", "mebibytes/sec",
+                "cycles/byte", "auto Mhz");
+      else
+        printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec",
+                "cycles/byte");
     }
 }
 
@@ -684,7 +840,7 @@ bench_encrypt_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -692,7 +848,7 @@ bench_encrypt_init (struct bench_obj *obj)
 static void
 bench_encrypt_free (struct bench_obj *obj)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
 
   gcry_cipher_close (hd);
 }
@@ -700,7 +856,7 @@ bench_encrypt_free (struct bench_obj *obj)
 static void
 bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
 
   err = gcry_cipher_encrypt (hd, buf, buflen, buf, buflen);
@@ -716,7 +872,7 @@ bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
 
   err = gcry_cipher_decrypt (hd, buf, buflen, buf, buflen);
@@ -790,7 +946,7 @@ bench_xts_encrypt_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -798,7 +954,7 @@ bench_xts_encrypt_init (struct bench_obj *obj)
 static void
 bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   unsigned int pos;
   static const char tweak[16] = { 0xff, 0xff, 0xfe, };
   size_t sectorlen = obj->step_size;
@@ -825,7 +981,7 @@ bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_xts_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   unsigned int pos;
   static const char tweak[16] = { 0xff, 0xff, 0xfe, };
   size_t sectorlen = obj->step_size;
@@ -865,7 +1021,7 @@ static struct bench_ops xts_decrypt_ops = {
 static void
 bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8];
   char nonce[11] = { 0x80, 0x01, };
@@ -909,7 +1065,7 @@ bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 static void
 bench_ccm_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8] = { 0, };
   char nonce[11] = { 0x80, 0x01, };
@@ -956,7 +1112,7 @@ static void
 bench_ccm_authenticate_do_bench (struct bench_obj *obj, void *buf,
 				 size_t buflen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[8] = { 0, };
   char nonce[11] = { 0x80, 0x01, };
@@ -1030,7 +1186,7 @@ static void
 bench_aead_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen,
 			     const char *nonce, size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16];
 
@@ -1060,7 +1216,7 @@ static void
 bench_aead_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen,
 			     const char *nonce, size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16] = { 0, };
 
@@ -1093,7 +1249,7 @@ bench_aead_authenticate_do_bench (struct bench_obj *obj, void *buf,
 				  size_t buflen, const char *nonce,
 				  size_t noncelen)
 {
-  gcry_cipher_hd_t hd = obj->priv;
+  gcry_cipher_hd_t hd = obj->hd;
   int err;
   char tag[16] = { 0, };
   char data = 0xff;
@@ -1360,6 +1516,7 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode)
   struct bench_cipher_mode mode = *pmode;
   struct bench_obj obj = { 0 };
   double result;
+  double bench_ghz;
   unsigned int blklen;
 
   mode.algo = algo;
@@ -1404,9 +1561,9 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 
@@ -1483,7 +1640,7 @@ bench_hash_init (struct bench_obj *obj)
       exit (1);
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   return 0;
 }
@@ -1491,7 +1648,7 @@ bench_hash_init (struct bench_obj *obj)
 static void
 bench_hash_free (struct bench_obj *obj)
 {
-  gcry_md_hd_t hd = obj->priv;
+  gcry_md_hd_t hd = obj->hd;
 
   gcry_md_close (hd);
 }
@@ -1499,7 +1656,7 @@ bench_hash_free (struct bench_obj *obj)
 static void
 bench_hash_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_md_hd_t hd = obj->priv;
+  gcry_md_hd_t hd = obj->hd;
 
   gcry_md_reset (hd);
   gcry_md_write (hd, buf, buflen);
@@ -1524,6 +1681,7 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode)
 {
   struct bench_hash_mode mode = *pmode;
   struct bench_obj obj = { 0 };
+  double bench_ghz;
   double result;
 
   mode.algo = algo;
@@ -1536,9 +1694,9 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 static void
@@ -1645,7 +1803,7 @@ bench_mac_init (struct bench_obj *obj)
       break;
     }
 
-  obj->priv = hd;
+  obj->hd = hd;
 
   free (key);
   return 0;
@@ -1654,7 +1812,7 @@ bench_mac_init (struct bench_obj *obj)
 static void
 bench_mac_free (struct bench_obj *obj)
 {
-  gcry_mac_hd_t hd = obj->priv;
+  gcry_mac_hd_t hd = obj->hd;
 
   gcry_mac_close (hd);
 }
@@ -1662,7 +1820,7 @@ bench_mac_free (struct bench_obj *obj)
 static void
 bench_mac_do_bench (struct bench_obj *obj, void *buf, size_t buflen)
 {
-  gcry_mac_hd_t hd = obj->priv;
+  gcry_mac_hd_t hd = obj->hd;
   size_t bs;
   char b;
 
@@ -1690,6 +1848,7 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode)
 {
   struct bench_mac_mode mode = *pmode;
   struct bench_obj obj = { 0 };
+  double bench_ghz;
   double result;
 
   mode.algo = algo;
@@ -1702,9 +1861,9 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  result = do_slope_benchmark (&obj);
+  result = do_slope_benchmark (&obj, &bench_ghz);
 
-  bench_print_result (result);
+  bench_print_result (result, bench_ghz);
 }
 
 static void
@@ -1807,9 +1966,11 @@ kdf_bench_one (int algo, int subalgo)
   struct bench_obj obj = { 0 };
   double nsecs_per_iteration;
   double cycles_per_iteration;
+  double bench_ghz;
   char algo_name[32];
   char nsecpiter_buf[16];
   char cpiter_buf[16];
+  char mhz_buf[16];
 
   mode.algo = algo;
   mode.subalgo = subalgo;
@@ -1843,31 +2004,45 @@ kdf_bench_one (int algo, int subalgo)
   obj.ops = mode.ops;
   obj.priv = &mode;
 
-  nsecs_per_iteration = do_slope_benchmark (&obj);
+  nsecs_per_iteration = do_slope_benchmark (&obj, &bench_ghz);
 
   strcpy(cpiter_buf, csv_mode ? "" : "-");
+  strcpy(mhz_buf, csv_mode ? "" : "-");
 
   double_to_str (nsecpiter_buf, sizeof (nsecpiter_buf), nsecs_per_iteration);
 
   /* If user didn't provide CPU speed, we cannot show cycles/iter results.  */
-  if (cpu_ghz > 0.0)
+  if (bench_ghz > 0.0)
     {
-      cycles_per_iteration = nsecs_per_iteration * cpu_ghz;
+      cycles_per_iteration = nsecs_per_iteration * bench_ghz;
       double_to_str (cpiter_buf, sizeof (cpiter_buf), cycles_per_iteration);
+      double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000);
     }
 
   if (csv_mode)
     {
-      printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n",
-	      current_section_name,
-	      current_algo_name ? current_algo_name : "",
-	      current_mode_name ? current_mode_name : "",
-	      nsecpiter_buf,
-	      cpiter_buf);
+      if (auto_ghz)
+        printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter,%s,Mhz\n",
+                current_section_name,
+                current_algo_name ? current_algo_name : "",
+                current_mode_name ? current_mode_name : "",
+                nsecpiter_buf,
+                cpiter_buf,
+                mhz_buf);
+      else
+        printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n",
+                current_section_name,
+                current_algo_name ? current_algo_name : "",
+                current_mode_name ? current_mode_name : "",
+                nsecpiter_buf,
+                cpiter_buf);
     }
   else
     {
-      printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf);
+      if (auto_ghz)
+        printf ("%14s %13s %9s\n", nsecpiter_buf, cpiter_buf, mhz_buf);
+      else
+        printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf);
     }
 }
 
@@ -1882,7 +2057,10 @@ kdf_bench (char **argv, int argc)
   if (!csv_mode)
     {
       printf (" %-*s | ", 24, "");
-      printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter");
+      if (auto_ghz)
+        printf ("%14s %13s %9s\n", "nanosecs/iter", "cycles/iter", "auto Mhz");
+      else
+        printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter");
     }
 
   if (argv && argc)
@@ -1923,7 +2101,8 @@ print_help (void)
     "",
     " options:",
     "   --cpu-mhz <mhz>           Set CPU speed for calculating cycles",
-    "                             per bytes results.",
+    "                             per bytes results.  Set as \"auto\"",
+    "                             for auto-detection of CPU speed.",
     "   --disable-hwf <features>  Disable hardware acceleration feature(s)",
     "                             for benchmarking.",
     "   --repetitions <n>         Use N repetitions (default "
@@ -2039,8 +2218,15 @@ main (int argc, char **argv)
 	  argv++;
 	  if (argc)
 	    {
-	      cpu_ghz = atof (*argv);
-	      cpu_ghz /= 1000;	/* Mhz => Ghz */
+              if (!strcmp (*argv, "auto"))
+                {
+                  auto_ghz = 1;
+                }
+              else
+                {
+                  cpu_ghz = atof (*argv);
+                  cpu_ghz /= 1000;	/* Mhz => Ghz */
+                }
 
 	      argc--;
 	      argv++;

commit 3841b23c0ccb24d555b7570083bba958e3126d26
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Thu Mar 22 21:42:23 2018 +0200

    _gcry_burn_stack: use memset for clearing memory
    
    * src/misc.c (__gcry_burn_stack) [HAVE_VLA]: Use 'memset' for clearing
    stack.
    --
    
    Patch switches stacking burning to use faster memset instead of
    wipememory. Memset is accessed through volatile function pointer,
    so that compiler will not optimize away the call.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/src/misc.c b/src/misc.c
index 002a84f..47d2dc7 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -501,11 +501,12 @@ void
 __gcry_burn_stack (unsigned int bytes)
 {
 #ifdef HAVE_VLA
+    static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset;
     /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */
     unsigned int buflen = ((!bytes + bytes) + 63) & ~63;
-    volatile char buf[buflen];
+    char buf[buflen];
 
-    wipememory (buf, sizeof buf);
+    memset_ptr (buf, 0, sizeof buf);
 #else
     volatile char buf[64];
 

commit a1127dbbada4302abf09eec90fbaceca87bfcdf0
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Thu Mar 22 21:42:22 2018 +0200

    Improve constant-time buffer compare
    
    * cipher/bufhelp.h (buf_eq_const): Rewrite logic.
    --
    
    New implementation for constant-time buffer comparing that
    avoids generating conditional code in comparison loop.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index b854bc0..83d3f53 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -290,13 +290,19 @@ buf_eq_const(const void *_a, const void *_b, size_t len)
 {
   const byte *a = _a;
   const byte *b = _b;
-  size_t diff, i;
+  int ab, ba;
+  size_t i;
 
   /* Constant-time compare. */
-  for (i = 0, diff = 0; i < len; i++)
-    diff -= !!(a[i] - b[i]);
+  for (i = 0, ab = 0, ba = 0; i < len; i++)
+    {
+      /* If a[i] != b[i], either ab or ba will be negative. */
+      ab |= a[i] - b[i];
+      ba |= b[i] - a[i];
+    }
 
-  return !diff;
+  /* 'ab | ba' is negative when buffers are not equal. */
+  return (ab | ba) >= 0;
 }
 
 
-----------------------------------------------------------------------

Summary of changes:
 cipher/bufhelp.h    |  14 ++-
 src/misc.c          |   5 +-
 tests/bench-slope.c | 312 +++++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 262 insertions(+), 69 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits


From jussi.kivilinna at iki.fi  Thu Mar 22 21:14:57 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Thu, 22 Mar 2018 22:14:57 +0200
Subject: [PATCH] tests/aeswrap: add in-place encryption/decryption testing
Message-ID: <152174969706.13724.8178957584763301213.stgit@localhost.localdomain>

* tests/aeswrap.c (check): Rename to...
(check_one): ...this and add in-place testing.
(check): New.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/tests/aeswrap.c b/tests/aeswrap.c
index 90add111c..dbbd7dd9a 100644
--- a/tests/aeswrap.c
+++ b/tests/aeswrap.c
@@ -31,10 +31,11 @@
 
 
 static void
-check (int algo,
-       const void *kek, size_t keklen,
-       const void *data, size_t datalen,
-       const void *expected, size_t expectedlen)
+check_one (int algo,
+           const void *kek, size_t keklen,
+           const void *data, size_t datalen,
+           const void *expected, size_t expectedlen,
+           int inplace)
 {
   gcry_error_t err;
   gcry_cipher_hd_t hd;
@@ -57,9 +58,19 @@ check (int algo,
 
   outbuflen = datalen + 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, data, datalen);
+      err = gcry_cipher_encrypt (hd, outbuf, outbuflen, outbuf, datalen);
+    }
   else
-    err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
+    {
+      err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err));
@@ -71,7 +82,7 @@ check (int algo,
       const unsigned char *s;
       int i;
 
-      fail ("mismatch at encryption!\n");
+      fail ("mismatch at encryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
@@ -84,9 +95,19 @@ check (int algo,
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err));
@@ -98,7 +119,7 @@ check (int algo,
       const unsigned char *s;
       int i;
 
-      fail ("mismatch at decryption!\n");
+      fail ("mismatch at decryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
@@ -113,9 +134,19 @@ check (int algo,
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err));
@@ -123,14 +154,24 @@ check (int algo,
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
-    fail ("mismatch at decryption(2)!\n");
+    fail ("mismatch at decryption(2)!%s\n", inplace ? " (inplace)" : "");
 
-  /* And once ore without a key reset. */
+  /* And once more without a key reset. */
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err));
@@ -138,12 +179,23 @@ check (int algo,
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
-    fail ("mismatch at decryption(3)!\n");
+    fail ("mismatch at decryption(3)!%s\n", inplace ? " (inplace)" : "");
 
   gcry_cipher_close (hd);
 }
 
 
+static void
+check (int algo,
+       const void *kek, size_t keklen,
+       const void *data, size_t datalen,
+       const void *expected, size_t expectedlen)
+{
+  check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 0);
+  check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 1);
+}
+
+
 static void
 check_all (void)
 {


From cvs at cvs.gnupg.org  Thu Mar 22 21:24:03 2018
From: cvs at cvs.gnupg.org (by Jussi Kivilinna)
Date: Thu, 22 Mar 2018 21:24:03 +0100
Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-51-g885f031
Message-ID: <E1ez6kl-0008A1-Vt@lists.gnupg.org>

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  885f031fbd17abc1c0fedbb98df22823b647fc11 (commit)
       via  330ec66e0babdabb658dc7d6db78f37b2a1b996e (commit)
      from  617f5e746f8295cc36d1002c8c53edc95d04d0f6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 885f031fbd17abc1c0fedbb98df22823b647fc11
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Thu Mar 22 21:54:20 2018 +0200

    tests/aeswrap: add in-place encryption/decryption testing
    
    * tests/aeswrap.c (check): Rename to...
    (check_one): ...this and add in-place testing.
    (check): New.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/tests/aeswrap.c b/tests/aeswrap.c
index 90add11..dbbd7dd 100644
--- a/tests/aeswrap.c
+++ b/tests/aeswrap.c
@@ -31,10 +31,11 @@
 
 
 static void
-check (int algo,
-       const void *kek, size_t keklen,
-       const void *data, size_t datalen,
-       const void *expected, size_t expectedlen)
+check_one (int algo,
+           const void *kek, size_t keklen,
+           const void *data, size_t datalen,
+           const void *expected, size_t expectedlen,
+           int inplace)
 {
   gcry_error_t err;
   gcry_cipher_hd_t hd;
@@ -57,9 +58,19 @@ check (int algo,
 
   outbuflen = datalen + 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, data, datalen);
+      err = gcry_cipher_encrypt (hd, outbuf, outbuflen, outbuf, datalen);
+    }
   else
-    err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
+    {
+      err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err));
@@ -71,7 +82,7 @@ check (int algo,
       const unsigned char *s;
       int i;
 
-      fail ("mismatch at encryption!\n");
+      fail ("mismatch at encryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
@@ -84,9 +95,19 @@ check (int algo,
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err));
@@ -98,7 +119,7 @@ check (int algo,
       const unsigned char *s;
       int i;
 
-      fail ("mismatch at decryption!\n");
+      fail ("mismatch at decryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
@@ -113,9 +134,19 @@ check (int algo,
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err));
@@ -123,14 +154,24 @@ check (int algo,
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
-    fail ("mismatch at decryption(2)!\n");
+    fail ("mismatch at decryption(2)!%s\n", inplace ? " (inplace)" : "");
 
-  /* And once ore without a key reset. */
+  /* And once more without a key reset. */
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
-    err = gpg_error (GPG_ERR_INTERNAL);
+    {
+      err = gpg_error (GPG_ERR_INTERNAL);
+    }
+  else if (inplace)
+    {
+      memcpy (outbuf, expected, expectedlen);
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
+    }
   else
-    err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    {
+      err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
+    }
+
   if (err)
     {
       fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err));
@@ -138,13 +179,24 @@ check (int algo,
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
-    fail ("mismatch at decryption(3)!\n");
+    fail ("mismatch at decryption(3)!%s\n", inplace ? " (inplace)" : "");
 
   gcry_cipher_close (hd);
 }
 
 
 static void
+check (int algo,
+       const void *kek, size_t keklen,
+       const void *data, size_t datalen,
+       const void *expected, size_t expectedlen)
+{
+  check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 0);
+  check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 1);
+}
+
+
+static void
 check_all (void)
 {
   if (verbose)

commit 330ec66e0babdabb658dc7d6db78f37b2a1b996e
Author: Stephan Mueller <smueller at chronox.de>
Date:   Mon Mar 12 22:24:37 2018 +0100

    AES-KW: fix in-place encryption
    
    * cipher/cipher-aeswrap.c: move memmove call before KW IV setting
    --
    
    In case AES-KW in-place encryption is performed, the plaintext must be
    moved to the correct destination location before the first semiblock of
    the destination buffer is modified. Without the patch, the first
    semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6.
    
    Signed-off-by: Stephan Mueller <smueller at chronox.de>

diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c
index 698742d..a8d0e03 100644
--- a/cipher/cipher-aeswrap.c
+++ b/cipher/cipher-aeswrap.c
@@ -70,6 +70,9 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
   a = outbuf;  /* We store A directly in OUTBUF.  */
   b = c->u_ctr.ctr;  /* B is also used to concatenate stuff.  */
 
+  /* Copy the inbuf to the outbuf. */
+  memmove (r+8, inbuf, inbuflen);
+
   /* If an IV has been set we use that IV as the Alternative Initial
      Value; if it has not been set we use the standard value.  */
   if (c->marks.iv)
@@ -77,9 +80,6 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
   else
     memset (a, 0xa6, 8);
 
-  /* Copy the inbuf to the outbuf. */
-  memmove (r+8, inbuf, inbuflen);
-
   memset (t, 0, sizeof t); /* t := 0.  */
 
   for (j = 0; j <= 5; j++)

-----------------------------------------------------------------------

Summary of changes:
 cipher/cipher-aeswrap.c |  6 ++--
 tests/aeswrap.c         | 86 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 72 insertions(+), 20 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits


From jussi.kivilinna at iki.fi  Thu Mar 22 21:29:15 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Thu, 22 Mar 2018 22:29:15 +0200
Subject: [PATCH] AES-KW: fix in-place encryption
In-Reply-To: <10870564.SVcAGkk7tg@tauon.chronox.de>
References: <4386046.jcxHWR5DvT@tauon.chronox.de>
 <10870564.SVcAGkk7tg@tauon.chronox.de>
Message-ID: <89d5b0b0-b7f3-8c2f-34b2-d0cec3e378ab@iki.fi>

On 12.03.2018 23:24, Stephan Mueller wrote:
> In case AES-KW in-place encryption is performed, the plaintext must be
> moved to the correct destination location before the first semiblock of
> the destination buffer is modified. Without the patch, the first
> semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6.
> 
> * cipher/cipher-aeswrap.c: move memove call before KW IV setting
> 

Thanks, applied and pushed.

-Jussi


From martin at martin.st  Thu Mar 22 22:32:36 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 23:32:36 +0200
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
Message-ID: <1521754360-5806-1-git-send-email-martin@martin.st>

* random/rndw32.c: Change _WIN64 ifdef into __x86_64__.
--

This fixes building this file for windows on aarch64.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 random/rndw32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/random/rndw32.c b/random/rndw32.c
index 7e9ac50..08a8867 100644
--- a/random/rndw32.c
+++ b/random/rndw32.c
@@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
 
      On AMD64, TSC is always available and intrinsic is provided for accessing
      it.  */
-#ifdef __WIN64__
+#ifdef __x86_64__
     {
       unsigned __int64 aint64;
 
@@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
           (*add) (&aword, sizeof (aword), origin );
         }
     }
-#endif /*__WIN64__*/
+#endif /*__x86_64__*/
 
 
 }
-- 
2.7.4


From martin at martin.st  Thu Mar 22 22:32:38 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 23:32:38 +0200
Subject: [PATCH 3/5] aarch64: Fix assembling chacha20-aarch64.S with clang/llvm
In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
Message-ID: <1521754360-5806-3-git-send-email-martin@martin.st>

* cipher/chacha20-aarch64.S: Remove superfluous lane counts.
--
When referring to a specific lane, one doesn't need to specify
the total number of lanes of the register. With GNU binutils,
both forms are accepted, while clang/llvm rejects the form
with the unnecessary number of lanes.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 cipher/chacha20-aarch64.S | 60 +++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 739ddde..5990a08 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4:
 	mov ROUND, #20;
 	ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
 
-	dup X12.4s, X15.4s[0];
-	dup X13.4s, X15.4s[1];
+	dup X12.4s, X15.s[0];
+	dup X13.4s, X15.s[1];
 	ldr CTR, [INPUT_CTR];
 	add X12.4s, X12.4s, VCTR.4s;
-	dup X0.4s, VTMP1.4s[0];
-	dup X1.4s, VTMP1.4s[1];
-	dup X2.4s, VTMP1.4s[2];
-	dup X3.4s, VTMP1.4s[3];
-	dup X14.4s, X15.4s[2];
+	dup X0.4s, VTMP1.s[0];
+	dup X1.4s, VTMP1.s[1];
+	dup X2.4s, VTMP1.s[2];
+	dup X3.4s, VTMP1.s[3];
+	dup X14.4s, X15.s[2];
 	cmhi VTMP0.4s, VCTR.4s, X12.4s;
-	dup X15.4s, X15.4s[3];
+	dup X15.4s, X15.s[3];
 	add CTR, CTR, #4; /* Update counter */
-	dup X4.4s, VTMP2.4s[0];
-	dup X5.4s, VTMP2.4s[1];
-	dup X6.4s, VTMP2.4s[2];
-	dup X7.4s, VTMP2.4s[3];
+	dup X4.4s, VTMP2.s[0];
+	dup X5.4s, VTMP2.s[1];
+	dup X6.4s, VTMP2.s[2];
+	dup X7.4s, VTMP2.s[3];
 	sub X13.4s, X13.4s, VTMP0.4s;
-	dup X8.4s, VTMP3.4s[0];
-	dup X9.4s, VTMP3.4s[1];
-	dup X10.4s, VTMP3.4s[2];
-	dup X11.4s, VTMP3.4s[3];
+	dup X8.4s, VTMP3.s[0];
+	dup X9.4s, VTMP3.s[1];
+	dup X10.4s, VTMP3.s[2];
+	dup X11.4s, VTMP3.s[3];
 	mov X12_TMP.16b, X12.16b;
 	mov X13_TMP.16b, X13.16b;
 	str CTR, [INPUT_CTR];
@@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X12, X12_TMP);        /* INPUT + 12 * 4 + counter */
 	PLUS(X13, X13_TMP);        /* INPUT + 13 * 4 + counter */
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
 	PLUS(X0, VTMP2);
 	PLUS(X1, VTMP3);
 	PLUS(X2, X12_TMP);
 	PLUS(X3, X13_TMP);
 
-	dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */
-	dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */
-	dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */
-	dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */
+	dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+	dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+	dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+	dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
 	ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
 	mov INPUT_POS, INPUT;
 	PLUS(X4, VTMP2);
@@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X6, X12_TMP);
 	PLUS(X7, X13_TMP);
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */
-	dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */
-	dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+	dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+	dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
 	PLUS(X8, VTMP2);
 	PLUS(X9, VTMP3);
 	PLUS(X10, X12_TMP);
-- 
2.7.4


From martin at martin.st  Thu Mar 22 22:32:40 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 23:32:40 +0200
Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly for
 windows
In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
Message-ID: <1521754360-5806-5-git-send-email-martin@martin.st>

* cipher/asm-common-aarch64.h: New.
* cipher/camellia-aarch64.S: Use ELF macro, use x19 instead of x18.
* cipher/chacha20-aarch64.S: Use ELF macro, don't use GOT on windows.
* cipher/cipher-gcm-armv8-aarch64-ce.S: Use ELF macro.
* cipher/rijndael-aarch64.S: Use ELF macro.
* cipher/rijndael-armv8-aarch64-ce.S: Use ELF macro.
* cipher/sha1-armv8-aarch64-ce.S: Use ELF macro.
* cipher/sha256-armv8-aarch64-ce.S: Use ELF macro.
* cipher/twofish-aarch64.S: Use ELF macro.
* configure.ac: Don't require .size and .type in aarch64 assembly check.
--
Don't require .type and .size in configure; we can make
them optional via a preprocessor macro.

This is mostly a mechanical change, wrapping the .type and .size
directives in an ELF() macro, with two actual manual changes:
(when targeting windows):
- Don't load global symbols via a GOT (in chacha20)
- Don't use the x18 register (in camellia); back up and restore x19
  in the prologue/epilogue and use that instead.

x18 is a platform specific register; on linux, it's free to be used
by user code, while it's reserved for platform use on windows and
darwin. Always use x19 instead of x18 for consistency.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
This isn't strictly necessary for building libgcrypt for windows
on aarch64; previously configure concludes that the assembly can't
be built since the .type and .size directives don't work. This
just allows using more of the existing assembly routines.

This also probably has the effect that the same assembly gets
enabled when targeting darwin (iOS), but building with assembly
enabled doesn't work for darwin anyway (even prior to this change,
since darwin requires an extra leading underscore on all symbols,
while the mpi/aarch64 code gets automatically enabled).
---
 cipher/asm-common-aarch64.h          | 32 ++++++++++++++++++++
 cipher/camellia-aarch64.S            | 23 ++++++++------
 cipher/chacha20-aarch64.S            | 12 ++++++--
 cipher/cipher-gcm-armv8-aarch64-ce.S | 10 +++----
 cipher/rijndael-aarch64.S            | 10 +++----
 cipher/rijndael-armv8-aarch64-ce.S   | 58 ++++++++++++++++++------------------
 cipher/sha1-armv8-aarch64-ce.S       |  6 ++--
 cipher/sha256-armv8-aarch64-ce.S     |  6 ++--
 cipher/twofish-aarch64.S             | 10 +++----
 configure.ac                         |  4 ---
 10 files changed, 105 insertions(+), 66 deletions(-)
 create mode 100644 cipher/asm-common-aarch64.h

diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
new file mode 100644
index 0000000..814b7ad
--- /dev/null
+++ b/cipher/asm-common-aarch64.h
@@ -0,0 +1,32 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#include <config.h>
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 68d2a7d..c3cc463 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -19,7 +19,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -55,12 +55,12 @@
 #define RT0 w15
 #define RT1 w16
 #define RT2 w17
-#define RT3 w18
+#define RT3 w19
 
 #define xRT0 x15
 #define xRT1 x16
 #define xRT2 x17
-#define xRT3 x18
+#define xRT3 x19
 
 #ifdef __AARCH64EL__
   #define host_to_be(reg, rtmp) \
@@ -198,9 +198,10 @@
 	str_output_be(RDST, YL, YR, XL, XR, RT0, RT1);
 
 .globl _gcry_camellia_arm_encrypt_block
-.type   _gcry_camellia_arm_encrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_encrypt_block, at function;)
 
 _gcry_camellia_arm_encrypt_block:
+	stp x19, x30, [sp, #-16]!
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -227,6 +228,7 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(24);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
 
@@ -236,14 +238,16 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(32);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
-.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
+ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
 
 .globl _gcry_camellia_arm_decrypt_block
-.type   _gcry_camellia_arm_decrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_decrypt_block, at function;)
 
 _gcry_camellia_arm_decrypt_block:
+	stp x19, x30, [sp, #-16]!
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -271,6 +275,7 @@ _gcry_camellia_arm_decrypt_block:
 
 	outunpack(0);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
 
@@ -281,11 +286,11 @@ _gcry_camellia_arm_decrypt_block:
 
 	b .Ldec_128;
 .ltorg
-.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
+ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
 
 /* Encryption/Decryption tables */
 .globl _gcry_camellia_arm_tables
-.type  _gcry_camellia_arm_tables, at object;
+ELF(.type  _gcry_camellia_arm_tables, at object;)
 .balign 32
 _gcry_camellia_arm_tables:
 .Lcamellia_sp1110:
@@ -551,7 +556,7 @@ _gcry_camellia_arm_tables:
 .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
-.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;
+ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 5990a08..3844d4e 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -27,7 +27,7 @@
  * Public domain.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -38,9 +38,15 @@
 
 .text
 
+#ifdef _WIN32
+#define GET_DATA_POINTER(reg, name) \
+	adrp    reg, name ; \
+	add     reg, reg, #:lo12:name ;
+#else
 #define GET_DATA_POINTER(reg, name) \
 	adrp    reg, :got:name ; \
 	ldr     reg, [reg, #:got_lo12:name] ;
+#endif
 
 /* register macros */
 #define INPUT     x0
@@ -148,7 +154,7 @@ chacha20_data:
 
 .align 3
 .globl _gcry_chacha20_aarch64_blocks4
-.type _gcry_chacha20_aarch64_blocks4,%function;
+ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
 
 _gcry_chacha20_aarch64_blocks4:
 	/* input:
@@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4:
 
 	eor x0, x0, x0
 	ret
-.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;
+ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;)
 
 #endif
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 0cfaf1c..b6c4f59 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -174,7 +174,7 @@ gcry_gcm_reduction_constant:
  */
 .align 3
 .globl _gcry_ghash_armv8_ce_pmull
-.type  _gcry_ghash_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_armv8_ce_pmull,%function;)
 _gcry_ghash_armv8_ce_pmull:
   /* input:
    *    x0: gcm_key
@@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;
+ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;)
 
 
 /*
@@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull:
  */
 .align 3
 .globl _gcry_ghash_setup_armv8_ce_pmull
-.type  _gcry_ghash_setup_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_setup_armv8_ce_pmull,%function;)
 _gcry_ghash_setup_armv8_ce_pmull:
   /* input:
    *	x0: gcm_key
@@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull:
   st1 {rh5.16b-rh6.16b}, [x1]
 
   ret
-.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;
+ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;)
 
 #endif
diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S
index e533bbe..aad7487 100644
--- a/cipher/rijndael-aarch64.S
+++ b/cipher/rijndael-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -206,7 +206,7 @@
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_encrypt_block
-.type   _gcry_aes_arm_encrypt_block,%function;
+ELF(.type   _gcry_aes_arm_encrypt_block,%function;)
 
 _gcry_aes_arm_encrypt_block:
 	/* input:
@@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block:
 	lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
 
 	b .Lenc_done;
-.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;)
 
 #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
 	ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
@@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block:
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_decrypt_block
-.type   _gcry_aes_arm_decrypt_block,%function;
+ELF(.type   _gcry_aes_arm_decrypt_block,%function;)
 
 _gcry_aes_arm_decrypt_block:
 	/* input:
@@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block:
 	decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
 
 	b .Ldec_tail;
-.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;
+ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__ */
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 40097a7..5859557 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -239,7 +239,7 @@
  */
 .align 3
 .globl _gcry_aes_enc_armv8_ce
-.type  _gcry_aes_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_enc_armv8_ce,%function;)
 _gcry_aes_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Lenc1_tail
-.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;
+ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;)
 
 
 /*
@@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_dec_armv8_ce
-.type  _gcry_aes_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_dec_armv8_ce,%function;)
 _gcry_aes_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Ldec1_tail
-.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;
+ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;)
 
 
 /*
@@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_enc_armv8_ce
-.type  _gcry_aes_cbc_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_enc_armv8_ce,%function;)
 _gcry_aes_cbc_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .Lcbc_enc_skip:
   ret
-.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;
+ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;)
 
 /*
  * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
@@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_dec_armv8_ce
-.type  _gcry_aes_cbc_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_dec_armv8_ce,%function;)
 _gcry_aes_cbc_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .Lcbc_dec_skip:
   ret
-.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;
+ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;)
 
 
 /*
@@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ctr_enc_armv8_ce
-.type  _gcry_aes_ctr_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ctr_enc_armv8_ce,%function;)
 _gcry_aes_ctr_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 .Lctr_enc_skip:
   ret
 
-.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;
+ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;)
 
 
 /*
@@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_enc_armv8_ce
-.type  _gcry_aes_cfb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_enc_armv8_ce,%function;)
 _gcry_aes_cfb_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .Lcfb_enc_skip:
   ret
-.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;
+ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;)
 
 
 /*
@@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_dec_armv8_ce
-.type  _gcry_aes_cfb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_dec_armv8_ce,%function;)
 _gcry_aes_cfb_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .Lcfb_dec_skip:
   ret
-.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;
+ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;)
 
 
 /*
@@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_enc_armv8_ce
-.type  _gcry_aes_ocb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_enc_armv8_ce,%function;)
 _gcry_aes_ocb_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;
+ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;)
 
 
 /*
@@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_dec_armv8_ce
-.type  _gcry_aes_ocb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_dec_armv8_ce,%function;)
 _gcry_aes_ocb_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;
+ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;)
 
 
 /*
@@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_auth_armv8_ce
-.type  _gcry_aes_ocb_auth_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_auth_armv8_ce,%function;)
 _gcry_aes_ocb_auth_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
+ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
 
 
 /*
@@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_enc_armv8_ce
-.type  _gcry_aes_xts_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_enc_armv8_ce,%function;)
 _gcry_aes_xts_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce:
 .Lxts_enc_skip:
   ret
 
-.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
 
 
 /*
@@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_dec_armv8_ce
-.type  _gcry_aes_xts_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_dec_armv8_ce,%function;)
 _gcry_aes_xts_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce:
 .Lxts_dec_skip:
   ret
 
-.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
 
 
 /*
@@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_sbox4_armv8_ce
-.type  _gcry_aes_sbox4_armv8_ce,%function;
+ELF(.type  _gcry_aes_sbox4_armv8_ce,%function;)
 _gcry_aes_sbox4_armv8_ce:
   /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in
    * Cryptology ? CT-RSA 2015" for details.
@@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce:
   mov w0, v0.S[0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
+ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
 
 
 /*
@@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_invmixcol_armv8_ce
-.type  _gcry_aes_invmixcol_armv8_ce,%function;
+ELF(.type  _gcry_aes_invmixcol_armv8_ce,%function;)
 _gcry_aes_invmixcol_armv8_ce:
   ld1 {v0.16b}, [x1]
   aesimc v0.16b, v0.16b
   st1 {v0.16b}, [x0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
+ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
 
 #endif
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index ec1810d..aeb67a1 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC:
  */
 .align 3
 .globl _gcry_sha1_transform_armv8_ce
-.type  _gcry_sha1_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha1_transform_armv8_ce,%function;)
 _gcry_sha1_transform_armv8_ce:
   /* input:
    *	x0: ctx, CTX
@@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;
+ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index a4575da..6b3ad32 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K:
  */
 .align 3
 .globl _gcry_sha256_transform_armv8_ce
-.type  _gcry_sha256_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha256_transform_armv8_ce,%function;)
 _gcry_sha256_transform_armv8_ce:
   /* input:
    *	r0: ctx, CTX
@@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;
+ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S
index 99c4675..adee412 100644
--- a/cipher/twofish-aarch64.S
+++ b/cipher/twofish-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -217,7 +217,7 @@
 	ror1(RD);
 
 .globl _gcry_twofish_arm_encrypt_block
-.type   _gcry_twofish_arm_encrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_encrypt_block,%function;)
 
 _gcry_twofish_arm_encrypt_block:
 	/* input:
@@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block:
 
 	ret;
 .ltorg
-.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
+ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;)
 
 .globl _gcry_twofish_arm_decrypt_block
-.type   _gcry_twofish_arm_decrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_decrypt_block,%function;)
 
 _gcry_twofish_arm_decrypt_block:
 	/* input:
@@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block:
 	str_output_le(RDST, RA, RB, RC, RD, RT0, RT1);
 
 	ret;
-.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
+ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/configure.ac b/configure.ac
index b5d7211..330485f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i
                 "eor x0, x0, x30, ror #12;\n\t"
                 "add x0, x0, x30, asr #12;\n\t"
                 "eor v0.16b, v0.16b, v31.16b;\n\t"
-
-                /* Test if '.type' and '.size' are supported.  */
-                ".size asmfunc,.-asmfunc;\n\t"
-                ".type asmfunc, at function;\n\t"
             );]])],
           [gcry_cv_gcc_aarch64_platform_as_ok=yes])])
 if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then
-- 
2.7.4


From martin at martin.st  Thu Mar 22 22:32:39 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 23:32:39 +0200
Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an int
 parameter
In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
Message-ID: <1521754360-5806-4-git-send-email-martin@martin.st>

* cipher/camellia-aarch64.S: Use 'w3' instead of 'x3'.
--
The keybits parameter is declared as int, and in those cases, the
upper half of a register is undefined, not guaranteed to be zero.

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 cipher/camellia-aarch64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 440f69f..68d2a7d 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -33,7 +33,7 @@
 #define CTX x0
 #define RDST x1
 #define RSRC x2
-#define RKEYBITS x3
+#define RKEYBITS w3
 
 #define RTAB1 x4
 #define RTAB2 x5
-- 
2.7.4


From martin at martin.st  Thu Mar 22 22:32:37 2018
From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=)
Date: Thu, 22 Mar 2018 23:32:37 +0200
Subject: [PATCH 2/5] aarch64: mpi: Fix building the mpi aarch64 assembly for
 windows
In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
Message-ID: <1521754360-5806-2-git-send-email-martin@martin.st>

* mpi/aarch64/mpih-add1.S: Use ELF macro.
* mpi/aarch64/mpih-mul1.S: Use ELF macro.
* mpi/aarch64/mpih-mul2.S: Use ELF macro.
* mpi/aarch64/mpih-mul3.S: Use ELF macro.
* mpi/aarch64/mpih-sub1.S: Use ELF macro.
* mpi/asm-common-aarch64.h: New.
--

The mpi aarch64 assembly is enabled as soon as the compiler supports
inline assembly, without checking for .type and .size, as is done
for the rest of the assembly in cipher/*.S. (The .type and .size
directives are only supported on ELF.)

Signed-off-by: Martin Storsj? <martin at martin.st>
---
 mpi/aarch64/mpih-add1.S  |  5 +++--
 mpi/aarch64/mpih-mul1.S  |  5 +++--
 mpi/aarch64/mpih-mul2.S  |  5 +++--
 mpi/aarch64/mpih-mul3.S  |  5 +++--
 mpi/aarch64/mpih-sub1.S  |  5 +++--
 mpi/asm-common-aarch64.h | 30 ++++++++++++++++++++++++++++++
 6 files changed, 45 insertions(+), 10 deletions(-)
 create mode 100644 mpi/asm-common-aarch64.h

diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index fa8cd01..4ead1c2 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_add_n
-.type  _gcry_mpih_add_n,%function
+ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
 	and	x5, x3, #3;
 	adds	xzr, xzr, xzr; /* clear carry flag */
@@ -68,4 +69,4 @@ _gcry_mpih_add_n:
 .Lend:
 	adc	x0, xzr, xzr;
 	ret;
-.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
+ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;)
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 65e98fe..8a86269 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_mul_1
-.type  _gcry_mpih_mul_1,%function
+ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
 	and	x5, x2, #3;
 	mov	x4, xzr;
@@ -93,4 +94,4 @@ _gcry_mpih_mul_1:
 .Lend:
 	mov	x0, x4;
 	ret;
-.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
+ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;)
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index bd3b2c9..c7c08e5 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_addmul_1
-.type  _gcry_mpih_addmul_1,%function
+ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
 	and	x5, x2, #3;
 	mov	x6, xzr;
@@ -105,4 +106,4 @@ _gcry_mpih_addmul_1:
 .Lend:
 	mov	x0, x6;
 	ret;
-.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
+ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;)
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index a58bc53..ccc961e 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_submul_1
-.type  _gcry_mpih_submul_1,%function
+ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
 	and	x5, x2, #3;
 	mov	x7, xzr;
@@ -118,4 +119,4 @@ _gcry_mpih_submul_1:
 .Loop_end:
 	cinc	x0, x7, cc;
 	ret;
-.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
+ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;)
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index cbf2f08..4a66373 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_sub_n
-.type  _gcry_mpih_sub_n,%function
+ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
 	and	x5, x3, #3;
 	subs	xzr, xzr, xzr; /* prepare carry flag for sub */
@@ -68,4 +69,4 @@ _gcry_mpih_sub_n:
 .Lend:
 	cset	x0, cc;
 	ret;
-.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
+ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;)
diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h
new file mode 100644
index 0000000..1269413
--- /dev/null
+++ b/mpi/asm-common-aarch64.h
@@ -0,0 +1,30 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPI_ASM_COMMON_AARCH64_H
+#define MPI_ASM_COMMON_AARCH64_H
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* MPI_ASM_COMMON_AARCH64_H */
-- 
2.7.4


From martin at martin.st  Thu Mar 22 21:54:30 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Thu, 22 Mar 2018 22:54:30 +0200 (EET)
Subject: DCO
Message-ID: <alpine.DEB.2.20.1803222253420.20294@cone.martin.st>

Libgcrypt Developer's Certificate of Origin.  Version 1.0
=========================================================

By making a contribution to the Libgcrypt project, I certify that:

(a) The contribution was created in whole or in part by me and I
     have the right to submit it under the free software license
     indicated in the file; or

(b) The contribution is based upon previous work that, to the
     best of my knowledge, is covered under an appropriate free
     software license and I have the right under that license to
     submit that work with modifications, whether created in whole
     or in part by me, under the same free software license
     (unless I am permitted to submit under a different license),
     as indicated in the file; or

(c) The contribution was provided directly to me by some other
     person who certified (a), (b) or (c) and I have not modified
     it.

(d) I understand and agree that this project and the contribution
     are public and that a record of the contribution (including
     all personal information I submit with it, including my
     sign-off) is maintained indefinitely and may be redistributed
     consistent with this project or the free software license(s)
     involved.

Signed-off-by: Martin Storsj? <martin at martin.st>
-------------- next part --------------
Libgcrypt Developer's Certificate of Origin.  Version 1.0
=========================================================

By making a contribution to the Libgcrypt project, I certify that:

(a) The contribution was created in whole or in part by me and I
    have the right to submit it under the free software license
    indicated in the file; or

(b) The contribution is based upon previous work that, to the
    best of my knowledge, is covered under an appropriate free
    software license and I have the right under that license to
    submit that work with modifications, whether created in whole
    or in part by me, under the same free software license
    (unless I am permitted to submit under a different license),
    as indicated in the file; or

(c) The contribution was provided directly to me by some other
    person who certified (a), (b) or (c) and I have not modified
    it.

(d) I understand and agree that this project and the contribution
    are public and that a record of the contribution (including
    all personal information I submit with it, including my
    sign-off) is maintained indefinitely and may be redistributed
    consistent with this project or the free software license(s)
    involved.

Signed-off-by: Martin Storsj? <martin at martin.st>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: DCO.sig
Type: application/pgp-signature
Size: 566 bytes
Desc: 
URL: <https://lists.gnupg.org/pipermail/gcrypt-devel/attachments/20180322/481933ad/attachment.sig>

From martin at martin.st  Thu Mar 22 22:05:51 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Thu, 22 Mar 2018 23:05:51 +0200 (EET)
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
Message-ID: <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>

Moikka,

On Thu, 22 Mar 2018, Jussi Kivilinna wrote:

> On 22.03.2018 10:56, Martin Storsj? wrote:
>> This fixes building this file for windows on aarch64.
>>
>> Signed-off-by: Martin Storsj? <martin at martin.st>
>
> Thanks for the patches. There is few generic things that needs to be done
> before these can be applied:
> - Check signing off DCO part in doc/HACKING

Thanks - this should done now, hopefully the signatures should be fine 
even though I don't have a really proper mail+PGP setup.

> - Add ChangeLog entries to beginning of commit log, see existing commit
>   messages for example.

Ok, will try to - I'll resend patches with this taken care of.

> I'm interested in testing these patches myself. Can you give some pointers
> for how to cross-compile libgcrypt for windows/aarch64?

Absolutely. First off, you need llvm-mingw: 
https://github.com/mstorsjo/llvm-mingw

The first step in this consists of building a recent pinned version of 
LLVM+clang, which takes a nontrivial amount of time (a bit over 30 min on 
an average machine), but I think a prebuilt version of the recent 6.0.0 
release [1] should also be fine. (I can't think of any changes since the 
6.0 branch that would be relevant for gcrypt.) If you use a prebuilt 
clang, you can skip the build-llvm.sh step.

After building that, you should have a set of 
{i686,x86_64,armv7,aarch64}-w64-mingw32-<tool> tools just like in normal 
mingw setups.

For these patches, I'm configuring with --host=aarch64-w64-mingw32 and 
nothing else.

In case you're interested in testing building in armv7 mode, you'll run 
into similar issues as for aarch64 (the ELF specific .type/.size aren't 
supported etc), but the build for that target succeeded out of the box so 
there wasn't quite as much need for patches there. One main caveat to know 
about windows on armv7 is that it's thumb-only, which can be an issue if 
there's assembly that isn't ready to be assembled in that mode.

If you actually want to try running binaries as well, you can actually get 
surprisingly far with running things in wine on aarch64 linux. For best 
results, you need a wine built with a recent clang (>= clang 5.0) with 
a patch [2] that isn't yet merged in upstream wine.

// Martin

[1] http://releases.llvm.org/download.html#6.0.0
[2] https://source.winehq.org/patches/data/136484

From jussi.kivilinna at iki.fi  Sun Mar 25 16:30:16 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Sun, 25 Mar 2018 17:30:16 +0300
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
 <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>
Message-ID: <e815793b-1680-8f4e-3775-188b10a9fc59@iki.fi>

Moi,

On 22.03.2018 23:05, Martin Storsj? wrote:
> Moikka,
> 
> On Thu, 22 Mar 2018, Jussi Kivilinna wrote:
> 
>> On 22.03.2018 10:56, Martin Storsj? wrote:
>>> This fixes building this file for windows on aarch64.
>>>
>>> Signed-off-by: Martin Storsj? <martin at martin.st>
>>
>> Thanks for the patches. There is few generic things that needs to be done
>> before these can be applied:
>> - Check signing off DCO part in doc/HACKING
> 
> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup.
> 

I'd also need to verify the signed DCO, so you would need to export 
your public key (maybe by send it to key-server).

>> - Add ChangeLog entries to beginning of commit log, see existing commit
>> ? messages for example.
> 
> Ok, will try to - I'll resend patches with this taken care of.
> 
>> I'm interested in testing these patches myself. Can you give some pointers
>> for how to cross-compile libgcrypt for windows/aarch64?
> 
> Absolutely. First off, you need llvm-mingw: https://github.com/mstorsjo/llvm-mingw
> 
> The first step in this consists of building a recent pinned version of LLVM+clang, which takes a nontrivial amount of time (a bit over 30 min on an average machine), but I think a prebuilt version of the recent 6.0.0 release [1] should also be fine. (I can't think of any changes since the 6.0 branch that would be relevant for gcrypt.) If you use a prebuilt clang, you can skip the build-llvm.sh step.
> 
> After building that, you should have a set of {i686,x86_64,armv7,aarch64}-w64-mingw32-<tool> tools just like in normal mingw setups.
> 
> For these patches, I'm configuring with --host=aarch64-w64-mingw32 and nothing else.
> 
> In case you're interested in testing building in armv7 mode, you'll run into similar issues as for aarch64 (the ELF specific .type/.size aren't supported etc), but the build for that target succeeded out of the box so there wasn't quite as much need for patches there. One main caveat to know about windows on armv7 is that it's thumb-only, which can be an issue if there's assembly that isn't ready to be assembled in that mode.
> 
> If you actually want to try running binaries as well, you can actually get surprisingly far with running things in wine on aarch64 linux. For best results, you need a wine built with a recent clang (>= clang 5.0) with a patch [2] that isn't yet merged in upstream wine.

Thanks, I managed to build clang/mingw tools and libgcrypt with them.

However, I did not have as much luck with wine. Current git upstream 
builds fine, and I managed to manually apply patch [2]. Yet any attempt 
to run aarch64/win64/PE executable on native aarch64 yields some wine 
assert failure prints and wine refuses to start up :( Even less luck on 
qemu-aarch64 chroot over x86_64... wine-aarch64 just segfaults.

-Jussi

> 
> // Martin
> 
> [1] http://releases.llvm.org/download.html#6.0.0
> [2] https://source.winehq.org/patches/data/136484
> 
> 


From martin at martin.st  Sun Mar 25 21:13:24 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Sun, 25 Mar 2018 22:13:24 +0300 (EEST)
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <e815793b-1680-8f4e-3775-188b10a9fc59@iki.fi>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
 <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>
 <e815793b-1680-8f4e-3775-188b10a9fc59@iki.fi>
Message-ID: <alpine.DEB.2.20.1803252154320.20294@cone.martin.st>

Hei,

On Sun, 25 Mar 2018, Jussi Kivilinna wrote:

> Moi,
>
> On 22.03.2018 23:05, Martin Storsj? wrote:
>> Moikka,
>>
>> On Thu, 22 Mar 2018, Jussi Kivilinna wrote:
>>
>>> On 22.03.2018 10:56, Martin Storsj? wrote:
>>>> This fixes building this file for windows on aarch64.
>>>>
>>>> Signed-off-by: Martin Storsj? <martin at martin.st>
>>>
>>> Thanks for the patches. There is few generic things that needs to be done
>>> before these can be applied:
>>> - Check signing off DCO part in doc/HACKING
>>
>> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup.
>>
>
> I'd also need to verify the signed DCO, so you would need to export
> your public key (maybe by send it to key-server).

Ah, right - that should be done now, and I'm finding it on a few public 
search interfaces on the web.

> Thanks, I managed to build clang/mingw tools and libgcrypt with them.

Ok, that's great.

> However, I did not have as much luck with wine. Current git upstream
> builds fine, and I managed to manually apply patch [2]. Yet any attempt
> to run aarch64/win64/PE executable on native aarch64 yields some wine
> assert failure prints and wine refuses to start up :( Even less luck on
> qemu-aarch64 chroot over x86_64... wine-aarch64 just segfaults.

Hmm, that doesn't sound too promising. Does it work on its own if you run 
e.g. "wine64 cmd", that doesn't load external binaries?

I have a few other patches locally as well that haven't been merged 
upstream, that probably also are needed:

- https://source.winehq.org/patches/data/135165
- https://source.winehq.org/patches/data/137759

These relate to the use of the x18 register. To have things working really 
properly, you'd need to use a linux distribution that is built completely 
with the -ffixed-x18 flag, to make the compiler avoid touching the x18 
register even though it's normally allowed on linux.

Since you probably don't have that (and neither do I), the first of the 
two patches adds this flag while building wine (which in itself only helps 
as long as wine doesn't call out to other libraries like glibc), and the 
second one that tries to restore x18 on calls to public functions, in case 
e.g. glibc actually had clobbered it. That works for calls to public 
functions, but not e.g. if wine does a callback to user code, or for COM 
interfaces.

These aren't enough to guarantee that things will work though (only a full 
distribution built with -ffixed-x18 would do), so they aren't merged 
upstream. Despite that, for me they are enough to have a lot of code 
working. (Code built with MSVC will use x18 a lot, code built with mingw 
libraries and compiler-rt uses it a bit less.)

For your case, even though the external binaries themselves might not rely 
on x18 all that much, wine and glibc themselves might, and might be broken 
by the few places where wine unconditionally sets the register.

Hopefully these two patches fixes the issue you are running into.

// Martin

From jussi.kivilinna at iki.fi  Sun Mar 25 22:17:49 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Sun, 25 Mar 2018 23:17:49 +0300
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <alpine.DEB.2.20.1803252154320.20294@cone.martin.st>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
 <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>
 <e815793b-1680-8f4e-3775-188b10a9fc59@iki.fi>
 <alpine.DEB.2.20.1803252154320.20294@cone.martin.st>
Message-ID: <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi>

On 25.03.2018 22:13, Martin Storsj? wrote:
> Hei,
> 
> On Sun, 25 Mar 2018, Jussi Kivilinna wrote:
> 
>> Moi,
>>
>> On 22.03.2018 23:05, Martin Storsj? wrote:
>>> Moikka,
>>>
>>> On Thu, 22 Mar 2018, Jussi Kivilinna wrote:
>>>
>>>> On 22.03.2018 10:56, Martin Storsj? wrote:
>>>>> This fixes building this file for windows on aarch64.
>>>>>
>>>>> Signed-off-by: Martin Storsj? <martin at martin.st>
>>>>
>>>> Thanks for the patches. There is few generic things that needs to be done
>>>> before these can be applied:
>>>> - Check signing off DCO part in doc/HACKING
>>>
>>> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup.
>>>
>>
>> I'd also need to verify the signed DCO, so you would need to export
>> your public key (maybe by send it to key-server).
> 
> Ah, right - that should be done now, and I'm finding it on a few public search interfaces on the web.
> 
Appears that somewhere along the way newlines on your DCO changed from Unix format to DOS and verifying DCO+DCO.sig fails:
 $ gpg --verify DCO.sig DCO
 gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET
 gpg:                using RSA key 0x2F9B2688742ACF25
 gpg: BAD signature from "Martin Storsj? <martin at martin.st>" [unknown]

When newlines are reverted back to unix format, signature matches:
 $ dos2unix < DCO | gpg --verify DCO.sig -
 gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET
 gpg:                using RSA key 0x2F9B2688742ACF25
 gpg: Good signature from "Martin Storsj? <martin at martin.st>" [unknown]

Which is good enough for me, but what do others think? Werner?

-Jussi


From martin at martin.st  Sun Mar 25 22:33:16 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Sun, 25 Mar 2018 23:33:16 +0300 (EEST)
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi>
References: <1521708975-30902-1-git-send-email-martin@martin.st>
 <f090f827-835e-ad6d-fc7d-0da84c2d1127@iki.fi>
 <alpine.DEB.2.20.1803222254450.20294@cone.martin.st>
 <e815793b-1680-8f4e-3775-188b10a9fc59@iki.fi>
 <alpine.DEB.2.20.1803252154320.20294@cone.martin.st>
 <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi>
Message-ID: <alpine.DEB.2.20.1803252332140.20294@cone.martin.st>

On Sun, 25 Mar 2018, Jussi Kivilinna wrote:

> On 25.03.2018 22:13, Martin Storsj? wrote:
>> Hei,
>>
>> On Sun, 25 Mar 2018, Jussi Kivilinna wrote:
>>
>>> Moi,
>>>
>>> On 22.03.2018 23:05, Martin Storsj? wrote:
>>>> Moikka,
>>>>
>>>> On Thu, 22 Mar 2018, Jussi Kivilinna wrote:
>>>>
>>>>> On 22.03.2018 10:56, Martin Storsj? wrote:
>>>>>> This fixes building this file for windows on aarch64.
>>>>>>
>>>>>> Signed-off-by: Martin Storsj? <martin at martin.st>
>>>>>
>>>>> Thanks for the patches. There is few generic things that needs to be done
>>>>> before these can be applied:
>>>>> - Check signing off DCO part in doc/HACKING
>>>>
>>>> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup.
>>>>
>>>
>>> I'd also need to verify the signed DCO, so you would need to export
>>> your public key (maybe by send it to key-server).
>>
>> Ah, right - that should be done now, and I'm finding it on a few public search interfaces on the web.
>>
> Appears that somewhere along the way newlines on your DCO changed from Unix format to DOS and verifying DCO+DCO.sig fails:
> $ gpg --verify DCO.sig DCO
> gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET
> gpg:                using RSA key 0x2F9B2688742ACF25
> gpg: BAD signature from "Martin Storsj? <martin at martin.st>" [unknown]
>
> When newlines are reverted back to unix format, signature matches:
> $ dos2unix < DCO | gpg --verify DCO.sig -
> gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET
> gpg:                using RSA key 0x2F9B2688742ACF25
> gpg: Good signature from "Martin Storsj? <martin at martin.st>" [unknown]
>
> Which is good enough for me, but what do others think? Werner?

Would it make things better to resend it with the signed attachment 
originally with dos newlines (or fixing whatever changed them and 
resending), or does that make my mess even bigger?

// Martin

From martin at martin.st  Tue Mar 27 22:49:57 2018
From: martin at martin.st (=?UTF-8?Q?Martin_Storsj=c3=b6?=)
Date: Tue, 27 Mar 2018 23:49:57 +0300
Subject: DCO (try 2)
Message-ID: <f0afc315-c858-fe7a-77e9-81a19f0c70b5@martin.st>

Libgcrypt Developer's Certificate of Origin.  Version 1.0
=========================================================

By making a contribution to the Libgcrypt project, I certify that:

(a) The contribution was created in whole or in part by me and I
    have the right to submit it under the free software license
    indicated in the file; or

(b) The contribution is based upon previous work that, to the
    best of my knowledge, is covered under an appropriate free
    software license and I have the right under that license to
    submit that work with modifications, whether created in whole
    or in part by me, under the same free software license
    (unless I am permitted to submit under a different license),
    as indicated in the file; or

(c) The contribution was provided directly to me by some other
    person who certified (a), (b) or (c) and I have not modified
    it.

(d) I understand and agree that this project and the contribution
    are public and that a record of the contribution (including
    all personal information I submit with it, including my
    sign-off) is maintained indefinitely and may be redistributed
    consistent with this project or the free software license(s)
    involved.

Signed-off-by: Martin Storsj? <martin at martin.st>


From martin at martin.st  Wed Mar 28 13:54:23 2018
From: martin at martin.st (=?UTF-8?Q?Martin_Storsj=c3=b6?=)
Date: Wed, 28 Mar 2018 14:54:23 +0300
Subject: DCO (try 3)
Message-ID: <dc1605ce-a47d-34c5-8851-d9569f9ea5d3@martin.st>

Libgcrypt Developer's Certificate of Origin.  Version 1.0
=========================================================

By making a contribution to the Libgcrypt project, I certify that:

(a) The contribution was created in whole or in part by me and I
    have the right to submit it under the free software license
    indicated in the file; or

(b) The contribution is based upon previous work that, to the
    best of my knowledge, is covered under an appropriate free
    software license and I have the right under that license to
    submit that work with modifications, whether created in whole
    or in part by me, under the same free software license
    (unless I am permitted to submit under a different license),
    as indicated in the file; or

(c) The contribution was provided directly to me by some other
    person who certified (a), (b) or (c) and I have not modified
    it.

(d) I understand and agree that this project and the contribution
    are public and that a record of the contribution (including
    all personal information I submit with it, including my
    sign-off) is maintained indefinitely and may be redistributed
    consistent with this project or the free software license(s)
    involved.

Signed-off-by: Martin Storsj? <martin at martin.st>

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.gnupg.org/pipermail/gcrypt-devel/attachments/20180328/85e3fd24/attachment-0001.sig>

From jussi.kivilinna at iki.fi  Wed Mar 28 18:18:42 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Wed, 28 Mar 2018 19:18:42 +0300
Subject: DCO (try 3)
In-Reply-To: <dc1605ce-a47d-34c5-8851-d9569f9ea5d3@martin.st>
References: <dc1605ce-a47d-34c5-8851-d9569f9ea5d3@martin.st>
Message-ID: <4ab5ed5d-da37-a175-4ccd-79918b6d4f41@iki.fi>


On 28.03.2018 14:54, Martin Storsj? wrote:
> Libgcrypt Developer's Certificate of Origin.  Version 1.0
> =========================================================
> 

Thanks.

-Jussi


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 638 bytes
Desc: OpenPGP digital signature
URL: <https://lists.gnupg.org/pipermail/gcrypt-devel/attachments/20180328/5aa536c2/attachment.sig>

From jussi.kivilinna at iki.fi  Wed Mar 28 19:53:17 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Wed, 28 Mar 2018 20:53:17 +0300
Subject: [PATCH 1/2] poly1305: silence compiler warning on clang/aarch64
Message-ID: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain>

* cipher/poly1305.c (MUL_MOD_1305_64): cast zero constant to 64-bits.
--

This patch fixes "value size does not match register size specified
by the constraint and modifier [-Wasm-operand-widths]" warnings when
building with clang/aarch64.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 68d9b9015..571f82862 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -130,7 +130,7 @@ static void poly1305_init (poly1305_context_t *ctx,
     /* carry propagation */ \
     H2 = H0 & 3; \
     H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
-    ADD_1305_64(H2, H1, H0, 0, x0_hi, x0_lo); \
+    ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
   } while (0)
 
 unsigned int


From jussi.kivilinna at iki.fi  Wed Mar 28 19:53:22 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Wed, 28 Mar 2018 20:53:22 +0300
Subject: [PATCH 2/2] aarch64/assembly: only use the lower 32 bit of an int
 parameters
In-Reply-To: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain>
References: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain>
Message-ID: <152225960280.17658.4754419425065114668.stgit@localhost.localdomain>

* cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block)
(__gcry_camellia_arm_decrypt_block): Make comment section about input
registers match usage.
* cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use
'w12' and 'w7' instead of 'x12' and 'x7'.
(_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function
prototype in comments.
* mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t
parameters.
* mpi/aarch64/mpih-mul1.S: Ditto.
* mpi/aarch64/mpih-mul2.S: Ditto.
* mpi/aarch64/mpih-mul3.S: Ditto.
* mpi/aarch64/mpih-sub1.S: Ditto.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index c3cc463d5..b0e9a0335 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block:
 	 *	x0: keytable
 	 *	x1: dst
 	 *	x2: src
-	 *	x3: keybitlen
+	 *	w3: keybitlen
 	 */
 
 	adr RTAB1,  _gcry_camellia_arm_tables;
@@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block:
 	 *	x0: keytable
 	 *	x1: dst
 	 *	x2: src
-	 *	x3: keybitlen
+	 *	w3: keybitlen
 	 */
 
 	adr RTAB1,  _gcry_camellia_arm_tables;
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 5859557ab..f0012c20a 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    w6: nrounds => w7
    *    w7: blkn => w12
    */
-  mov x12, x7
-  mov x7, x6
+  mov w12, w7
+  mov w7, w6
   mov x6, x5
   mov x5, x4
   mov x4, x3
@@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
  * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
@@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
  * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index 4ead1c23b..3370320e0 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -29,7 +29,7 @@
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	x0
  *		   mpi_ptr_t s1_ptr,		x1
  *		   mpi_ptr_t s2_ptr,		x2
- *		   mpi_size_t size)		x3
+ *		   mpi_size_t size)		w3
  */
 
 .text
@@ -37,34 +37,34 @@
 .globl _gcry_mpih_add_n
 ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	adds	xzr, xzr, xzr; /* clear carry flag */
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x4, [x1], #8;
-	sub	x3, x3, #1;
+	sub	w3, w3, #1;
 	ldr	x11, [x2], #8;
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	adcs	x4, x4, x11;
 	str	x4, [x0], #8;
-	cbz	x3, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w3, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x4, x6, [x1], #16;
 	ldp	x5, x7, [x2], #16;
 	ldp	x8, x10, [x1], #16;
 	ldp	x9, x11, [x2], #16;
-	sub	x3, x3, #4;
+	sub	w3, w3, #4;
 	adcs	x4, x4, x5;
 	adcs	x6, x6, x7;
 	adcs	x8, x8, x9;
 	adcs	x10, x10, x11;
 	stp	x4, x6, [x0], #16;
 	stp	x8, x10, [x0], #16;
-	cbnz	x3, .Large_loop;
+	cbnz	w3, .Large_loop;
 
 .Lend:
 	adc	x0, xzr, xzr;
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 8a8626936..8830845a7 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,		x0
  *		  mpi_ptr_t s1_ptr,		x1
- *		  mpi_size_t s1_size,		x2
+ *		  mpi_size_t s1_size,		w2
  *		  mpi_limb_t s2_limb)		x3
  */
 
@@ -37,27 +37,27 @@
 .globl _gcry_mpih_mul_1
 ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x4, xzr;
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x5, [x1], #8;
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 	mul	x9, x5, x3;
 	umulh	x10, x5, x3;
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	adds	x4, x4, x9;
 	str	x4, [x0], #8;
 	adc	x4, x10, xzr;
 
-	cbz	x2, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w2, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x5, x6, [x1];
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 
 	mul	x9, x5, x3;
 	ldp	x7, x8, [x1, #16];
@@ -89,7 +89,7 @@ _gcry_mpih_mul_1:
 	str	x4, [x0], #8;
 	adc	x4, x16, xzr;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 .Lend:
 	mov	x0, x4;
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index c7c08e5ab..5d736990e 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,	x0
  *		     mpi_ptr_t s1_ptr,		x1
- *		     mpi_size_t s1_size,	x2
+ *		     mpi_size_t s1_size,	w2
  *		     mpi_limb_t s2_limb)	x3
  */
 
@@ -37,11 +37,11 @@
 .globl _gcry_mpih_addmul_1
 ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x6, xzr;
 	mov	x7, xzr;
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x5, [x1], #8;
@@ -49,21 +49,21 @@ _gcry_mpih_addmul_1:
 	mul	x12, x5, x3;
 	ldr	x4, [x0];
 	umulh	x13, x5, x3;
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 
 	adds	x12, x12, x4;
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	adc	x13, x13, x7;
 	adds	x12, x12, x6;
 	str	x12, [x0], #8;
 	adc	x6, x7, x13;
 
-	cbz	x2, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w2, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x5, x9, [x1], #16;
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 	ldp	x4, x8, [x0];
 
 	mul	x12, x5, x3;
@@ -101,7 +101,7 @@ _gcry_mpih_addmul_1:
 	str	x14, [x0], #8;
 	adc	x6, x7, x15;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 .Lend:
 	mov	x0, x6;
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index ccc961e62..f785e5e42 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,	x0
  *		     mpi_ptr_t s1_ptr,		x1
- *		     mpi_size_t s1_size,	x2
+ *		     mpi_size_t s1_size,	w2
  *		     mpi_limb_t s2_limb)	x3
  */
 
@@ -37,9 +37,9 @@
 .globl _gcry_mpih_submul_1
 ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x7, xzr;
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 	subs	xzr, xzr, xzr;
 
@@ -47,26 +47,26 @@ _gcry_mpih_submul_1:
 	ldr	x4, [x1], #8;
 	cinc	x7, x7, cc;
 	ldr	x5, [x0];
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 
 	mul	x6, x4, x3;
 	subs	x5, x5, x7;
 	umulh	x4, x4, x3;
-	and	x10, x2, #3;
+	and	w10, w2, #3;
 
 	cset	x7, cc;
 	subs	x5, x5, x6;
 	add	x7, x7, x4;
 	str	x5, [x0], #8;
 
-	cbz	x2, .Loop_end;
-	cbnz	x10, .Loop;
+	cbz	w2, .Loop_end;
+	cbnz	w10, .Loop;
 
 	cinc	x7, x7, cc;
 
 .Large_loop:
 	ldp	x4, x8, [x1], #16;
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 	ldp	x5, x9, [x0];
 
 	mul	x6, x4, x3;
@@ -111,7 +111,7 @@ _gcry_mpih_submul_1:
 	str	x9, [x0], #8;
 	cinc	x7, x7, cc;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 	mov	x0, x7;
 	ret;
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index 4a663732d..45a7b0417 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -29,7 +29,7 @@
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	x0
  *		   mpi_ptr_t s1_ptr,		x1
  *		   mpi_ptr_t s2_ptr,		x2
- *		   mpi_size_t size)		x3
+ *		   mpi_size_t size)		w3
  */
 
 .text
@@ -37,34 +37,34 @@
 .globl _gcry_mpih_sub_n
 ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	subs	xzr, xzr, xzr; /* prepare carry flag for sub */
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x4, [x1], #8;
-	sub	x3, x3, #1;
+	sub	w3, w3, #1;
 	ldr	x11, [x2], #8;
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	sbcs	x4, x4, x11;
 	str	x4, [x0], #8;
-	cbz	x3, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w3, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x4, x6, [x1], #16;
 	ldp	x5, x7, [x2], #16;
 	ldp	x8, x10, [x1], #16;
 	ldp	x9, x11, [x2], #16;
-	sub	x3, x3, #4;
+	sub	w3, w3, #4;
 	sbcs	x4, x4, x5;
 	sbcs	x6, x6, x7;
 	sbcs	x8, x8, x9;
 	sbcs	x10, x10, x11;
 	stp	x4, x6, [x0], #16;
 	stp	x8, x10, [x0], #16;
-	cbnz	x3, .Large_loop;
+	cbnz	w3, .Large_loop;
 
 .Lend:
 	cset	x0, cc;


From jussi.kivilinna at iki.fi  Wed Mar 28 20:04:20 2018
From: jussi.kivilinna at iki.fi (Jussi Kivilinna)
Date: Wed, 28 Mar 2018 21:04:20 +0300
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
Message-ID: <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi>

Hello,

On 22.03.2018 23:32, Martin Storsj? wrote:
> * random/rndw32.c: Change _WIN64 ifdef into __x86_64__.
> --
> 
> This fixes building this file for windows on aarch64.
> 

Thanks, all patched applied and pushed.

-Jussi


From martin at martin.st  Wed Mar 28 21:37:43 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Wed, 28 Mar 2018 22:37:43 +0300 (EEST)
Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64
In-Reply-To: <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi>
References: <1521754360-5806-1-git-send-email-martin@martin.st>
 <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi>
Message-ID: <alpine.DEB.2.20.1803282237370.20294@cone.martin.st>

On Wed, 28 Mar 2018, Jussi Kivilinna wrote:

> Hello,
>
> On 22.03.2018 23:32, Martin Storsj? wrote:
>> * random/rndw32.c: Change _WIN64 ifdef into __x86_64__.
>> --
>>
>> This fixes building this file for windows on aarch64.
>>
>
> Thanks, all patched applied and pushed.

Thanks!

// Martin

From martin at martin.st  Wed Mar 28 21:39:42 2018
From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=)
Date: Wed, 28 Mar 2018 22:39:42 +0300 (EEST)
Subject: [PATCH 2/2] aarch64/assembly: only use the lower 32 bit of an
 int parameters
In-Reply-To: <152225960280.17658.4754419425065114668.stgit@localhost.localdomain>
References: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain>
 <152225960280.17658.4754419425065114668.stgit@localhost.localdomain>
Message-ID: <alpine.DEB.2.20.1803282238430.20294@cone.martin.st>

On Wed, 28 Mar 2018, Jussi Kivilinna wrote:

> * cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block)
> (__gcry_camellia_arm_decrypt_block): Make comment section about input
> registers match usage.
> * cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use
> 'w12' and 'w7' instead of 'x12' and 'x7'.
> (_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function
> prototype in comments.
> * mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t
> parameters.
> * mpi/aarch64/mpih-mul1.S: Ditto.
> * mpi/aarch64/mpih-mul2.S: Ditto.
> * mpi/aarch64/mpih-mul3.S: Ditto.
> * mpi/aarch64/mpih-sub1.S: Ditto.
> --
>
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
> ---

Both patches look good to me, for what it's worth.

// Martin


From cvs at cvs.gnupg.org  Wed Mar 28 19:40:21 2018
From: cvs at cvs.gnupg.org (by Martin Storsjö)
Date: Wed, 28 Mar 2018 19:40:21 +0200
Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-57-g0de2191
Message-ID: <E1f1F3e-0001Q7-8g@lists.gnupg.org>

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4 (commit)
       via  4e1b628f492643d4e9b830bcdab7b49daaec5854 (commit)
       via  36e916fc332eda74963192b1c0bf6860a3e5d67b (commit)
       via  ec0a2f25c0f64a7b65b373508ce9081e10461965 (commit)
       via  ed41d6d6fb4551342b22ef763de1bd60e964e186 (commit)
       via  8ee38806245ca8452051b1a245f44082323f37f6 (commit)
      from  885f031fbd17abc1c0fedbb98df22823b647fc11 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4
Author: Martin Storsj? <martin at martin.st>
Date:   Thu Mar 22 23:32:40 2018 +0200

    aarch64: Enable building the aarch64 cipher assembly for windows
    
    * cipher/asm-common-aarch64.h: New.
    * cipher/camellia-aarch64.S: Use ELF macro, use x19 instead of x18.
    * cipher/chacha20-aarch64.S: Use ELF macro, don't use GOT on windows.
    * cipher/cipher-gcm-armv8-aarch64-ce.S: Use ELF macro.
    * cipher/rijndael-aarch64.S: Use ELF macro.
    * cipher/rijndael-armv8-aarch64-ce.S: Use ELF macro.
    * cipher/sha1-armv8-aarch64-ce.S: Use ELF macro.
    * cipher/sha256-armv8-aarch64-ce.S: Use ELF macro.
    * cipher/twofish-aarch64.S: Use ELF macro.
    * configure.ac: Don't require .size and .type in aarch64 assembly check.
    --
    Don't require .type and .size in configure; we can make
    them optional via a preprocessor macro.
    
    This is mostly a mechanical change, wrapping the .type and .size
    directives in an ELF() macro, with two actual manual changes:
    (when targeting windows):
    - Don't load global symbols via a GOT (in chacha20)
    - Don't use the x18 register (in camellia); back up and restore x19
      in the prologue/epilogue and use that instead.
    
    x18 is a platform specific register; on linux, it's free to be used
    by user code, while it's reserved for platform use on windows and
    darwin. Always use x19 instead of x18 for consistency.
    
    Signed-off-by: Martin Storsj? <martin at martin.st>

diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
new file mode 100644
index 0000000..814b7ad
--- /dev/null
+++ b/cipher/asm-common-aarch64.h
@@ -0,0 +1,32 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#include <config.h>
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 68d2a7d..c3cc463 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -19,7 +19,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -55,12 +55,12 @@
 #define RT0 w15
 #define RT1 w16
 #define RT2 w17
-#define RT3 w18
+#define RT3 w19
 
 #define xRT0 x15
 #define xRT1 x16
 #define xRT2 x17
-#define xRT3 x18
+#define xRT3 x19
 
 #ifdef __AARCH64EL__
   #define host_to_be(reg, rtmp) \
@@ -198,9 +198,10 @@
 	str_output_be(RDST, YL, YR, XL, XR, RT0, RT1);
 
 .globl _gcry_camellia_arm_encrypt_block
-.type   _gcry_camellia_arm_encrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_encrypt_block, at function;)
 
 _gcry_camellia_arm_encrypt_block:
+	stp x19, x30, [sp, #-16]!
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -227,6 +228,7 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(24);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
 
@@ -236,14 +238,16 @@ _gcry_camellia_arm_encrypt_block:
 
 	outunpack(32);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
-.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
+ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
 
 .globl _gcry_camellia_arm_decrypt_block
-.type   _gcry_camellia_arm_decrypt_block, at function;
+ELF(.type   _gcry_camellia_arm_decrypt_block, at function;)
 
 _gcry_camellia_arm_decrypt_block:
+	stp x19, x30, [sp, #-16]!
 	/* input:
 	 *	x0: keytable
 	 *	x1: dst
@@ -271,6 +275,7 @@ _gcry_camellia_arm_decrypt_block:
 
 	outunpack(0);
 
+	ldp x19, x30, [sp], #16
 	ret;
 .ltorg
 
@@ -281,11 +286,11 @@ _gcry_camellia_arm_decrypt_block:
 
 	b .Ldec_128;
 .ltorg
-.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
+ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
 
 /* Encryption/Decryption tables */
 .globl _gcry_camellia_arm_tables
-.type  _gcry_camellia_arm_tables, at object;
+ELF(.type  _gcry_camellia_arm_tables, at object;)
 .balign 32
 _gcry_camellia_arm_tables:
 .Lcamellia_sp1110:
@@ -551,7 +556,7 @@ _gcry_camellia_arm_tables:
 .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
-.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;
+ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 5990a08..3844d4e 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -27,7 +27,7 @@
  * Public domain.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -38,9 +38,15 @@
 
 .text
 
+#ifdef _WIN32
+#define GET_DATA_POINTER(reg, name) \
+	adrp    reg, name ; \
+	add     reg, reg, #:lo12:name ;
+#else
 #define GET_DATA_POINTER(reg, name) \
 	adrp    reg, :got:name ; \
 	ldr     reg, [reg, #:got_lo12:name] ;
+#endif
 
 /* register macros */
 #define INPUT     x0
@@ -148,7 +154,7 @@ chacha20_data:
 
 .align 3
 .globl _gcry_chacha20_aarch64_blocks4
-.type _gcry_chacha20_aarch64_blocks4,%function;
+ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
 
 _gcry_chacha20_aarch64_blocks4:
 	/* input:
@@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4:
 
 	eor x0, x0, x0
 	ret
-.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;
+ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;)
 
 #endif
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 0cfaf1c..b6c4f59 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -174,7 +174,7 @@ gcry_gcm_reduction_constant:
  */
 .align 3
 .globl _gcry_ghash_armv8_ce_pmull
-.type  _gcry_ghash_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_armv8_ce_pmull,%function;)
 _gcry_ghash_armv8_ce_pmull:
   /* input:
    *    x0: gcm_key
@@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;
+ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;)
 
 
 /*
@@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull:
  */
 .align 3
 .globl _gcry_ghash_setup_armv8_ce_pmull
-.type  _gcry_ghash_setup_armv8_ce_pmull,%function;
+ELF(.type  _gcry_ghash_setup_armv8_ce_pmull,%function;)
 _gcry_ghash_setup_armv8_ce_pmull:
   /* input:
    *	x0: gcm_key
@@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull:
   st1 {rh5.16b-rh6.16b}, [x1]
 
   ret
-.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;
+ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;)
 
 #endif
diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S
index e533bbe..aad7487 100644
--- a/cipher/rijndael-aarch64.S
+++ b/cipher/rijndael-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -206,7 +206,7 @@
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_encrypt_block
-.type   _gcry_aes_arm_encrypt_block,%function;
+ELF(.type   _gcry_aes_arm_encrypt_block,%function;)
 
 _gcry_aes_arm_encrypt_block:
 	/* input:
@@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block:
 	lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
 
 	b .Lenc_done;
-.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;)
 
 #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
 	ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
@@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block:
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .globl _gcry_aes_arm_decrypt_block
-.type   _gcry_aes_arm_decrypt_block,%function;
+ELF(.type   _gcry_aes_arm_decrypt_block,%function;)
 
 _gcry_aes_arm_decrypt_block:
 	/* input:
@@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block:
 	decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
 
 	b .Ldec_tail;
-.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;
+ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__ */
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 40097a7..5859557 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -239,7 +239,7 @@
  */
 .align 3
 .globl _gcry_aes_enc_armv8_ce
-.type  _gcry_aes_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_enc_armv8_ce,%function;)
 _gcry_aes_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Lenc1_tail
-.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;
+ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;)
 
 
 /*
@@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_dec_armv8_ce
-.type  _gcry_aes_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_dec_armv8_ce,%function;)
 _gcry_aes_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce:
   CLEAR_REG(vk13)
   CLEAR_REG(vk14)
   b .Ldec1_tail
-.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;
+ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;)
 
 
 /*
@@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_enc_armv8_ce
-.type  _gcry_aes_cbc_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_enc_armv8_ce,%function;)
 _gcry_aes_cbc_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .Lcbc_enc_skip:
   ret
-.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;
+ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;)
 
 /*
  * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
@@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cbc_dec_armv8_ce
-.type  _gcry_aes_cbc_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cbc_dec_armv8_ce,%function;)
 _gcry_aes_cbc_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .Lcbc_dec_skip:
   ret
-.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;
+ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;)
 
 
 /*
@@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ctr_enc_armv8_ce
-.type  _gcry_aes_ctr_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ctr_enc_armv8_ce,%function;)
 _gcry_aes_ctr_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 .Lctr_enc_skip:
   ret
 
-.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;
+ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;)
 
 
 /*
@@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_enc_armv8_ce
-.type  _gcry_aes_cfb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_enc_armv8_ce,%function;)
 _gcry_aes_cfb_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .Lcfb_enc_skip:
   ret
-.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;
+ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;)
 
 
 /*
@@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_cfb_dec_armv8_ce
-.type  _gcry_aes_cfb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_cfb_dec_armv8_ce,%function;)
 _gcry_aes_cfb_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .Lcfb_dec_skip:
   ret
-.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;
+ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;)
 
 
 /*
@@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_enc_armv8_ce
-.type  _gcry_aes_ocb_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_enc_armv8_ce,%function;)
 _gcry_aes_ocb_enc_armv8_ce:
   /* input:
    *    x0: keysched
@@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;
+ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;)
 
 
 /*
@@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_dec_armv8_ce
-.type  _gcry_aes_ocb_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_dec_armv8_ce,%function;)
 _gcry_aes_ocb_dec_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;
+ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;)
 
 
 /*
@@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce:
 
 .align 3
 .globl _gcry_aes_ocb_auth_armv8_ce
-.type  _gcry_aes_ocb_auth_armv8_ce,%function;
+ELF(.type  _gcry_aes_ocb_auth_armv8_ce,%function;)
 _gcry_aes_ocb_auth_armv8_ce:
   /* input:
    *    x0: keysched
@@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce:
   CLEAR_REG(v16)
 
   ret
-.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
+ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
 
 
 /*
@@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_enc_armv8_ce
-.type  _gcry_aes_xts_enc_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_enc_armv8_ce,%function;)
 _gcry_aes_xts_enc_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce:
 .Lxts_enc_skip:
   ret
 
-.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
 
 
 /*
@@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce:
 
 .align 3
 .globl _gcry_aes_xts_dec_armv8_ce
-.type  _gcry_aes_xts_dec_armv8_ce,%function;
+ELF(.type  _gcry_aes_xts_dec_armv8_ce,%function;)
 _gcry_aes_xts_dec_armv8_ce:
   /* input:
    *    r0: keysched
@@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce:
 .Lxts_dec_skip:
   ret
 
-.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
 
 
 /*
@@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_sbox4_armv8_ce
-.type  _gcry_aes_sbox4_armv8_ce,%function;
+ELF(.type  _gcry_aes_sbox4_armv8_ce,%function;)
 _gcry_aes_sbox4_armv8_ce:
   /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in
    * Cryptology ? CT-RSA 2015" for details.
@@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce:
   mov w0, v0.S[0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
+ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
 
 
 /*
@@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce:
  */
 .align 3
 .globl _gcry_aes_invmixcol_armv8_ce
-.type  _gcry_aes_invmixcol_armv8_ce,%function;
+ELF(.type  _gcry_aes_invmixcol_armv8_ce,%function;)
 _gcry_aes_invmixcol_armv8_ce:
   ld1 {v0.16b}, [x1]
   aesimc v0.16b, v0.16b
   st1 {v0.16b}, [x0]
   CLEAR_REG(v0)
   ret
-.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
+ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
 
 #endif
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index ec1810d..aeb67a1 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC:
  */
 .align 3
 .globl _gcry_sha1_transform_armv8_ce
-.type  _gcry_sha1_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha1_transform_armv8_ce,%function;)
 _gcry_sha1_transform_armv8_ce:
   /* input:
    *	x0: ctx, CTX
@@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;
+ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index a4575da..6b3ad32 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -17,7 +17,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
@@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K:
  */
 .align 3
 .globl _gcry_sha256_transform_armv8_ce
-.type  _gcry_sha256_transform_armv8_ce,%function;
+ELF(.type  _gcry_sha256_transform_armv8_ce,%function;)
 _gcry_sha256_transform_armv8_ce:
   /* input:
    *	r0: ctx, CTX
@@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce:
 .Ldo_nothing:
   mov x0, #0
   ret
-.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;
+ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;)
 
 #endif
diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S
index 99c4675..adee412 100644
--- a/cipher/twofish-aarch64.S
+++ b/cipher/twofish-aarch64.S
@@ -18,7 +18,7 @@
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <config.h>
+#include "asm-common-aarch64.h"
 
 #if defined(__AARCH64EL__)
 #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
@@ -217,7 +217,7 @@
 	ror1(RD);
 
 .globl _gcry_twofish_arm_encrypt_block
-.type   _gcry_twofish_arm_encrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_encrypt_block,%function;)
 
 _gcry_twofish_arm_encrypt_block:
 	/* input:
@@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block:
 
 	ret;
 .ltorg
-.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
+ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;)
 
 .globl _gcry_twofish_arm_decrypt_block
-.type   _gcry_twofish_arm_decrypt_block,%function;
+ELF(.type   _gcry_twofish_arm_decrypt_block,%function;)
 
 _gcry_twofish_arm_decrypt_block:
 	/* input:
@@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block:
 	str_output_le(RDST, RA, RB, RC, RD, RT0, RT1);
 
 	ret;
-.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
+ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;)
 
 #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
 #endif /*__AARCH64EL__*/
diff --git a/configure.ac b/configure.ac
index b5d7211..330485f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i
                 "eor x0, x0, x30, ror #12;\n\t"
                 "add x0, x0, x30, asr #12;\n\t"
                 "eor v0.16b, v0.16b, v31.16b;\n\t"
-
-                /* Test if '.type' and '.size' are supported.  */
-                ".size asmfunc,.-asmfunc;\n\t"
-                ".type asmfunc, at function;\n\t"
             );]])],
           [gcry_cv_gcc_aarch64_platform_as_ok=yes])])
 if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then

commit 4e1b628f492643d4e9b830bcdab7b49daaec5854
Author: Martin Storsj? <martin at martin.st>
Date:   Thu Mar 22 23:32:39 2018 +0200

    aarch64: camellia: Only use the lower 32 bit of an int parameter
    
    * cipher/camellia-aarch64.S: Use 'w3' instead of 'x3'.
    --
    The keybits parameter is declared as int, and in those cases, the
    upper half of a register is undefined, not guaranteed to be zero.
    
    Signed-off-by: Martin Storsj? <martin at martin.st>

diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 440f69f..68d2a7d 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -33,7 +33,7 @@
 #define CTX x0
 #define RDST x1
 #define RSRC x2
-#define RKEYBITS x3
+#define RKEYBITS w3
 
 #define RTAB1 x4
 #define RTAB2 x5

commit 36e916fc332eda74963192b1c0bf6860a3e5d67b
Author: Martin Storsj? <martin at martin.st>
Date:   Thu Mar 22 23:32:38 2018 +0200

    aarch64: Fix assembling chacha20-aarch64.S with clang/llvm
    
    * cipher/chacha20-aarch64.S: Remove superfluous lane counts.
    --
    When referring to a specific lane, one doesn't need to specify
    the total number of lanes of the register. With GNU binutils,
    both forms are accepted, while clang/llvm rejects the form
    with the unnecessary number of lanes.
    
    Signed-off-by: Martin Storsj? <martin at martin.st>

diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 739ddde..5990a08 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4:
 	mov ROUND, #20;
 	ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
 
-	dup X12.4s, X15.4s[0];
-	dup X13.4s, X15.4s[1];
+	dup X12.4s, X15.s[0];
+	dup X13.4s, X15.s[1];
 	ldr CTR, [INPUT_CTR];
 	add X12.4s, X12.4s, VCTR.4s;
-	dup X0.4s, VTMP1.4s[0];
-	dup X1.4s, VTMP1.4s[1];
-	dup X2.4s, VTMP1.4s[2];
-	dup X3.4s, VTMP1.4s[3];
-	dup X14.4s, X15.4s[2];
+	dup X0.4s, VTMP1.s[0];
+	dup X1.4s, VTMP1.s[1];
+	dup X2.4s, VTMP1.s[2];
+	dup X3.4s, VTMP1.s[3];
+	dup X14.4s, X15.s[2];
 	cmhi VTMP0.4s, VCTR.4s, X12.4s;
-	dup X15.4s, X15.4s[3];
+	dup X15.4s, X15.s[3];
 	add CTR, CTR, #4; /* Update counter */
-	dup X4.4s, VTMP2.4s[0];
-	dup X5.4s, VTMP2.4s[1];
-	dup X6.4s, VTMP2.4s[2];
-	dup X7.4s, VTMP2.4s[3];
+	dup X4.4s, VTMP2.s[0];
+	dup X5.4s, VTMP2.s[1];
+	dup X6.4s, VTMP2.s[2];
+	dup X7.4s, VTMP2.s[3];
 	sub X13.4s, X13.4s, VTMP0.4s;
-	dup X8.4s, VTMP3.4s[0];
-	dup X9.4s, VTMP3.4s[1];
-	dup X10.4s, VTMP3.4s[2];
-	dup X11.4s, VTMP3.4s[3];
+	dup X8.4s, VTMP3.s[0];
+	dup X9.4s, VTMP3.s[1];
+	dup X10.4s, VTMP3.s[2];
+	dup X11.4s, VTMP3.s[3];
 	mov X12_TMP.16b, X12.16b;
 	mov X13_TMP.16b, X13.16b;
 	str CTR, [INPUT_CTR];
@@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X12, X12_TMP);        /* INPUT + 12 * 4 + counter */
 	PLUS(X13, X13_TMP);        /* INPUT + 13 * 4 + counter */
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
 	PLUS(X0, VTMP2);
 	PLUS(X1, VTMP3);
 	PLUS(X2, X12_TMP);
 	PLUS(X3, X13_TMP);
 
-	dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */
-	dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */
-	dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */
-	dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */
+	dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+	dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+	dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+	dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
 	ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
 	mov INPUT_POS, INPUT;
 	PLUS(X4, VTMP2);
@@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4:
 	PLUS(X6, X12_TMP);
 	PLUS(X7, X13_TMP);
 
-	dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */
-	dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */
-	dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */
-	dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */
-	dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */
-	dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */
+	dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+	dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+	dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+	dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+	dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+	dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
 	PLUS(X8, VTMP2);
 	PLUS(X9, VTMP3);
 	PLUS(X10, X12_TMP);

commit ec0a2f25c0f64a7b65b373508ce9081e10461965
Author: Martin Storsj? <martin at martin.st>
Date:   Thu Mar 22 23:32:37 2018 +0200

    aarch64: mpi: Fix building the mpi aarch64 assembly for windows
    
    * mpi/aarch64/mpih-add1.S: Use ELF macro.
    * mpi/aarch64/mpih-mul1.S: Use ELF macro.
    * mpi/aarch64/mpih-mul2.S: Use ELF macro.
    * mpi/aarch64/mpih-mul3.S: Use ELF macro.
    * mpi/aarch64/mpih-sub1.S: Use ELF macro.
    * mpi/asm-common-aarch64.h: New.
    --
    
    The mpi aarch64 assembly is enabled as soon as the compiler supports
    inline assembly, without checking for .type and .size, as is done
    for the rest of the assembly in cipher/*.S. (The .type and .size
    directives are only supported on ELF.)
    
    Signed-off-by: Martin Storsj? <martin at martin.st>

diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index fa8cd01..4ead1c2 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_add_n
-.type  _gcry_mpih_add_n,%function
+ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
 	and	x5, x3, #3;
 	adds	xzr, xzr, xzr; /* clear carry flag */
@@ -68,4 +69,4 @@ _gcry_mpih_add_n:
 .Lend:
 	adc	x0, xzr, xzr;
 	ret;
-.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
+ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;)
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 65e98fe..8a86269 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_mul_1
-.type  _gcry_mpih_mul_1,%function
+ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
 	and	x5, x2, #3;
 	mov	x4, xzr;
@@ -93,4 +94,4 @@ _gcry_mpih_mul_1:
 .Lend:
 	mov	x0, x4;
 	ret;
-.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
+ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;)
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index bd3b2c9..c7c08e5 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_addmul_1
-.type  _gcry_mpih_addmul_1,%function
+ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
 	and	x5, x2, #3;
 	mov	x6, xzr;
@@ -105,4 +106,4 @@ _gcry_mpih_addmul_1:
 .Lend:
 	mov	x0, x6;
 	ret;
-.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
+ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;)
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index a58bc53..ccc961e 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  * mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_submul_1
-.type  _gcry_mpih_submul_1,%function
+ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
 	and	x5, x2, #3;
 	mov	x7, xzr;
@@ -118,4 +119,4 @@ _gcry_mpih_submul_1:
 .Loop_end:
 	cinc	x0, x7, cc;
 	ret;
-.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
+ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;)
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index cbf2f08..4a66373 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -22,6 +22,7 @@
 
 #include "sysdep.h"
 #include "asm-syntax.h"
+#include "asm-common-aarch64.h"
 
 /*******************
  *  mpi_limb_t
@@ -34,7 +35,7 @@
 .text
 
 .globl _gcry_mpih_sub_n
-.type  _gcry_mpih_sub_n,%function
+ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
 	and	x5, x3, #3;
 	subs	xzr, xzr, xzr; /* prepare carry flag for sub */
@@ -68,4 +69,4 @@ _gcry_mpih_sub_n:
 .Lend:
 	cset	x0, cc;
 	ret;
-.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
+ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;)
diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h
new file mode 100644
index 0000000..1269413
--- /dev/null
+++ b/mpi/asm-common-aarch64.h
@@ -0,0 +1,30 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsj? <martin at martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPI_ASM_COMMON_AARCH64_H
+#define MPI_ASM_COMMON_AARCH64_H
+
+#ifdef __ELF__
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#endif /* MPI_ASM_COMMON_AARCH64_H */

commit ed41d6d6fb4551342b22ef763de1bd60e964e186
Author: Martin Storsj? <martin at martin.st>
Date:   Thu Mar 22 23:32:36 2018 +0200

    random: Don't assume that _WIN64 implies x86_64
    
    * random/rndw32.c: Change _WIN64 ifdef into __x86_64__.
    --
    
    This fixes building this file for windows on aarch64.
    
    Signed-off-by: Martin Storsj? <martin at martin.st>

diff --git a/random/rndw32.c b/random/rndw32.c
index 7e9ac50..08a8867 100644
--- a/random/rndw32.c
+++ b/random/rndw32.c
@@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
 
      On AMD64, TSC is always available and intrinsic is provided for accessing
      it.  */
-#ifdef __WIN64__
+#ifdef __x86_64__
     {
       unsigned __int64 aint64;
 
@@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
           (*add) (&aword, sizeof (aword), origin );
         }
     }
-#endif /*__WIN64__*/
+#endif /*__x86_64__*/
 
 
 }

commit 8ee38806245ca8452051b1a245f44082323f37f6
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Mar 28 20:32:56 2018 +0300

    Register DCO for Martin Storsj?
    
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/AUTHORS b/AUTHORS
index 8c553e6..49ab941 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -172,6 +172,9 @@ Jussi Kivilinna <jussi.kivilinna at iki.fi>
 Markus Teich <markus dot teich at stusta dot mhn dot de>
 2014-10-08:20141008180509.GA2770 at trolle:
 
+Martin Storsj? <martin at martin.st>
+2018-03-28:dc1605ce-a47d-34c5-8851-d9569f9ea5d3 at martin.st:
+
 Mathias L. Baumann <mathias.baumann at sociomantic.com>
 2017-01-30:07c06d79-0828-b564-d604-fd16c7c86ebe at sociomantic.com:
 

-----------------------------------------------------------------------

Summary of changes:
 AUTHORS                                         |  3 ++
 mpi/ec-ed25519.c => cipher/asm-common-aarch64.h | 29 +++++-----
 cipher/camellia-aarch64.S                       | 25 +++++----
 cipher/chacha20-aarch64.S                       | 72 +++++++++++++------------
 cipher/cipher-gcm-armv8-aarch64-ce.S            | 10 ++--
 cipher/rijndael-aarch64.S                       | 10 ++--
 cipher/rijndael-armv8-aarch64-ce.S              | 58 ++++++++++----------
 cipher/sha1-armv8-aarch64-ce.S                  |  6 +--
 cipher/sha256-armv8-aarch64-ce.S                |  6 +--
 cipher/twofish-aarch64.S                        | 10 ++--
 configure.ac                                    |  4 --
 mpi/aarch64/mpih-add1.S                         |  5 +-
 mpi/aarch64/mpih-mul1.S                         |  5 +-
 mpi/aarch64/mpih-mul2.S                         |  5 +-
 mpi/aarch64/mpih-mul3.S                         |  5 +-
 mpi/aarch64/mpih-sub1.S                         |  5 +-
 mpi/{ec-internal.h => asm-common-aarch64.h}     | 17 +++---
 random/rndw32.c                                 |  4 +-
 18 files changed, 147 insertions(+), 132 deletions(-)
 copy mpi/ec-ed25519.c => cipher/asm-common-aarch64.h (68%)
 copy mpi/{ec-internal.h => asm-common-aarch64.h} (69%)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits

From cvs at cvs.gnupg.org  Thu Mar 29 16:57:04 2018
From: cvs at cvs.gnupg.org (by Jussi Kivilinna)
Date: Thu, 29 Mar 2018 16:57:04 +0200
Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-59-g9b58e4a
Message-ID: <E1f1YzB-0008UJ-O9@lists.gnupg.org>

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  9b58e4a03ba3aeff7bae3f40da706977870c9649 (commit)
       via  8cdb010f04528703a502344e00d52447de12547d (commit)
      from  0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9b58e4a03ba3aeff7bae3f40da706977870c9649
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Mar 24 17:49:16 2018 +0200

    aarch64/assembly: only use the lower 32 bit of an int parameters
    
    * cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block)
    (__gcry_camellia_arm_decrypt_block): Make comment section about input
    registers match usage.
    * cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use
    'w12' and 'w7' instead of 'x12' and 'x7'.
    (_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function
    prototype in comments.
    * mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t
    parameters.
    * mpi/aarch64/mpih-mul1.S: Ditto.
    * mpi/aarch64/mpih-mul2.S: Ditto.
    * mpi/aarch64/mpih-mul3.S: Ditto.
    * mpi/aarch64/mpih-sub1.S: Ditto.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index c3cc463..b0e9a03 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block:
 	 *	x0: keytable
 	 *	x1: dst
 	 *	x2: src
-	 *	x3: keybitlen
+	 *	w3: keybitlen
 	 */
 
 	adr RTAB1,  _gcry_camellia_arm_tables;
@@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block:
 	 *	x0: keytable
 	 *	x1: dst
 	 *	x2: src
-	 *	x3: keybitlen
+	 *	w3: keybitlen
 	 */
 
 	adr RTAB1,  _gcry_camellia_arm_tables;
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 5859557..f0012c2 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    w6: nrounds => w7
    *    w7: blkn => w12
    */
-  mov x12, x7
-  mov x7, x6
+  mov w12, w7
+  mov w7, w6
   mov x6, x5
   mov x5, x4
   mov x4, x3
@@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
  * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
@@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
  * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index 4ead1c2..3370320 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -29,7 +29,7 @@
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	x0
  *		   mpi_ptr_t s1_ptr,		x1
  *		   mpi_ptr_t s2_ptr,		x2
- *		   mpi_size_t size)		x3
+ *		   mpi_size_t size)		w3
  */
 
 .text
@@ -37,34 +37,34 @@
 .globl _gcry_mpih_add_n
 ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	adds	xzr, xzr, xzr; /* clear carry flag */
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x4, [x1], #8;
-	sub	x3, x3, #1;
+	sub	w3, w3, #1;
 	ldr	x11, [x2], #8;
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	adcs	x4, x4, x11;
 	str	x4, [x0], #8;
-	cbz	x3, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w3, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x4, x6, [x1], #16;
 	ldp	x5, x7, [x2], #16;
 	ldp	x8, x10, [x1], #16;
 	ldp	x9, x11, [x2], #16;
-	sub	x3, x3, #4;
+	sub	w3, w3, #4;
 	adcs	x4, x4, x5;
 	adcs	x6, x6, x7;
 	adcs	x8, x8, x9;
 	adcs	x10, x10, x11;
 	stp	x4, x6, [x0], #16;
 	stp	x8, x10, [x0], #16;
-	cbnz	x3, .Large_loop;
+	cbnz	w3, .Large_loop;
 
 .Lend:
 	adc	x0, xzr, xzr;
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 8a86269..8830845 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,		x0
  *		  mpi_ptr_t s1_ptr,		x1
- *		  mpi_size_t s1_size,		x2
+ *		  mpi_size_t s1_size,		w2
  *		  mpi_limb_t s2_limb)		x3
  */
 
@@ -37,27 +37,27 @@
 .globl _gcry_mpih_mul_1
 ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x4, xzr;
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x5, [x1], #8;
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 	mul	x9, x5, x3;
 	umulh	x10, x5, x3;
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	adds	x4, x4, x9;
 	str	x4, [x0], #8;
 	adc	x4, x10, xzr;
 
-	cbz	x2, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w2, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x5, x6, [x1];
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 
 	mul	x9, x5, x3;
 	ldp	x7, x8, [x1, #16];
@@ -89,7 +89,7 @@ _gcry_mpih_mul_1:
 	str	x4, [x0], #8;
 	adc	x4, x16, xzr;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 .Lend:
 	mov	x0, x4;
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index c7c08e5..5d73699 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,	x0
  *		     mpi_ptr_t s1_ptr,		x1
- *		     mpi_size_t s1_size,	x2
+ *		     mpi_size_t s1_size,	w2
  *		     mpi_limb_t s2_limb)	x3
  */
 
@@ -37,11 +37,11 @@
 .globl _gcry_mpih_addmul_1
 ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x6, xzr;
 	mov	x7, xzr;
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x5, [x1], #8;
@@ -49,21 +49,21 @@ _gcry_mpih_addmul_1:
 	mul	x12, x5, x3;
 	ldr	x4, [x0];
 	umulh	x13, x5, x3;
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 
 	adds	x12, x12, x4;
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	adc	x13, x13, x7;
 	adds	x12, x12, x6;
 	str	x12, [x0], #8;
 	adc	x6, x7, x13;
 
-	cbz	x2, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w2, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x5, x9, [x1], #16;
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 	ldp	x4, x8, [x0];
 
 	mul	x12, x5, x3;
@@ -101,7 +101,7 @@ _gcry_mpih_addmul_1:
 	str	x14, [x0], #8;
 	adc	x6, x7, x15;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 .Lend:
 	mov	x0, x6;
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index ccc961e..f785e5e 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,	x0
  *		     mpi_ptr_t s1_ptr,		x1
- *		     mpi_size_t s1_size,	x2
+ *		     mpi_size_t s1_size,	w2
  *		     mpi_limb_t s2_limb)	x3
  */
 
@@ -37,9 +37,9 @@
 .globl _gcry_mpih_submul_1
 ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
-	and	x5, x2, #3;
+	and	w5, w2, #3;
 	mov	x7, xzr;
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 	subs	xzr, xzr, xzr;
 
@@ -47,26 +47,26 @@ _gcry_mpih_submul_1:
 	ldr	x4, [x1], #8;
 	cinc	x7, x7, cc;
 	ldr	x5, [x0];
-	sub	x2, x2, #1;
+	sub	w2, w2, #1;
 
 	mul	x6, x4, x3;
 	subs	x5, x5, x7;
 	umulh	x4, x4, x3;
-	and	x10, x2, #3;
+	and	w10, w2, #3;
 
 	cset	x7, cc;
 	subs	x5, x5, x6;
 	add	x7, x7, x4;
 	str	x5, [x0], #8;
 
-	cbz	x2, .Loop_end;
-	cbnz	x10, .Loop;
+	cbz	w2, .Loop_end;
+	cbnz	w10, .Loop;
 
 	cinc	x7, x7, cc;
 
 .Large_loop:
 	ldp	x4, x8, [x1], #16;
-	sub	x2, x2, #4;
+	sub	w2, w2, #4;
 	ldp	x5, x9, [x0];
 
 	mul	x6, x4, x3;
@@ -111,7 +111,7 @@ _gcry_mpih_submul_1:
 	str	x9, [x0], #8;
 	cinc	x7, x7, cc;
 
-	cbnz	x2, .Large_loop;
+	cbnz	w2, .Large_loop;
 
 	mov	x0, x7;
 	ret;
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index 4a66373..45a7b04 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -29,7 +29,7 @@
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	x0
  *		   mpi_ptr_t s1_ptr,		x1
  *		   mpi_ptr_t s2_ptr,		x2
- *		   mpi_size_t size)		x3
+ *		   mpi_size_t size)		w3
  */
 
 .text
@@ -37,34 +37,34 @@
 .globl _gcry_mpih_sub_n
 ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	subs	xzr, xzr, xzr; /* prepare carry flag for sub */
 
-	cbz	x5, .Large_loop;
+	cbz	w5, .Large_loop;
 
 .Loop:
 	ldr	x4, [x1], #8;
-	sub	x3, x3, #1;
+	sub	w3, w3, #1;
 	ldr	x11, [x2], #8;
-	and	x5, x3, #3;
+	and	w5, w3, #3;
 	sbcs	x4, x4, x11;
 	str	x4, [x0], #8;
-	cbz	x3, .Lend;
-	cbnz	x5, .Loop;
+	cbz	w3, .Lend;
+	cbnz	w5, .Loop;
 
 .Large_loop:
 	ldp	x4, x6, [x1], #16;
 	ldp	x5, x7, [x2], #16;
 	ldp	x8, x10, [x1], #16;
 	ldp	x9, x11, [x2], #16;
-	sub	x3, x3, #4;
+	sub	w3, w3, #4;
 	sbcs	x4, x4, x5;
 	sbcs	x6, x6, x7;
 	sbcs	x8, x8, x9;
 	sbcs	x10, x10, x11;
 	stp	x4, x6, [x0], #16;
 	stp	x8, x10, [x0], #16;
-	cbnz	x3, .Large_loop;
+	cbnz	w3, .Large_loop;
 
 .Lend:
 	cset	x0, cc;

commit 8cdb010f04528703a502344e00d52447de12547d
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Mar 24 17:22:45 2018 +0200

    poly1305: silence compiler warning on clang/aarch64
    
    * cipher/poly1305.c (MUL_MOD_1305_64): cast zero constant to 64-bits.
    --
    
    This patch fixes "value size does not match register size specified
    by the constraint and modifier [-Wasm-operand-widths]" warnings when
    building with clang/aarch64.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 68d9b90..571f828 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -130,7 +130,7 @@ static void poly1305_init (poly1305_context_t *ctx,
     /* carry propagation */ \
     H2 = H0 & 3; \
     H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
-    ADD_1305_64(H2, H1, H0, 0, x0_hi, x0_lo); \
+    ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
   } while (0)
 
 unsigned int

-----------------------------------------------------------------------

Summary of changes:
 cipher/camellia-aarch64.S          |  4 ++--
 cipher/poly1305.c                  |  2 +-
 cipher/rijndael-armv8-aarch64-ce.S | 12 ++++++++----
 mpi/aarch64/mpih-add1.S            | 18 +++++++++---------
 mpi/aarch64/mpih-mul1.S            | 18 +++++++++---------
 mpi/aarch64/mpih-mul2.S            | 18 +++++++++---------
 mpi/aarch64/mpih-mul3.S            | 18 +++++++++---------
 mpi/aarch64/mpih-sub1.S            | 18 +++++++++---------
 8 files changed, 56 insertions(+), 52 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits