From jussi.kivilinna at iki.fi Sun Mar 11 16:10:49 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 11 Mar 2018 17:10:49 +0200 Subject: [PATCH 1/2] Improve constant-time buffer compare Message-ID: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain> * cipher/bufhelp.h (buf_eq_const): Rewrite logic. -- New implementation for constant-time buffer comparing that avoids generating conditional code in comparison loop. Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h index b854bc016..83d3f53aa 100644 --- a/cipher/bufhelp.h +++ b/cipher/bufhelp.h @@ -290,13 +290,19 @@ buf_eq_const(const void *_a, const void *_b, size_t len) { const byte *a = _a; const byte *b = _b; - size_t diff, i; + int ab, ba; + size_t i; /* Constant-time compare. */ - for (i = 0, diff = 0; i < len; i++) - diff -= !!(a[i] - b[i]); + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + /* If a[i] != b[i], either ab or ba will be negative. */ + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } - return !diff; + /* 'ab | ba' is negative when buffers are not equal. */ + return (ab | ba) >= 0; } From jussi.kivilinna at iki.fi Sun Mar 11 16:10:54 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 11 Mar 2018 17:10:54 +0200 Subject: [PATCH 2/2] _gcry_burn_stack: use memset for clearing memory In-Reply-To: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain> References: <152078104967.30895.5002712088528772387.stgit@localhost.localdomain> Message-ID: <152078105472.30895.16990019070381191769.stgit@localhost.localdomain> * src/misc.c (__gcry_burn_stack) [HAVE_VLA]: Use 'memset' for clearing stack. -- Patch switches stacking burning to use faster memset instead of wipememory. Memset is accessed through volatile function pointer, so that compiler will not optimize away the call. Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/src/misc.c b/src/misc.c index 002a84f69..47d2dc712 100644 --- a/src/misc.c +++ b/src/misc.c @@ -501,11 +501,12 @@ void __gcry_burn_stack (unsigned int bytes) { #ifdef HAVE_VLA + static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ unsigned int buflen = ((!bytes + bytes) + 63) & ~63; - volatile char buf[buflen]; + char buf[buflen]; - wipememory (buf, sizeof buf); + memset_ptr (buf, 0, sizeof buf); #else volatile char buf[64]; From jussi.kivilinna at iki.fi Sun Mar 11 16:11:12 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 11 Mar 2018 17:11:12 +0200 Subject: [PATCH] bench-slope: add CPU frequency auto-detection Message-ID: <152078107280.31205.13929557969371133343.stgit@localhost.localdomain> * tests/bench-slope.c (bench_obj): Add 'hd'. (bench_encrypt_init, bench_encrypt_free, bench_encrypt_do_bench) (bench_decrypt_do_bench, bench_xts_encrypt_init) (bench_xts_encrypt_do_bench, bench_xts_decrypt_do_bench) (bench_ccm_encrypt_init, bench_ccm_encrypt_do_bench) (bench_ccm_decrypt_do_bench, bench_aead_encrypt_init) (bench_aead_encrypt_do_bench, bench_aead_decrypt_do_bench) (bench_hash_init, bench_hash_free, bench_hash_do_bench) (bench_mac_init, bench_mac_free, bench_mac_do_bench): Use 'obj->hd' for storing pointer to crypto context. (auto_ghz): New. (do_slope_benchmark): Rename to... (slope_benchmark): ...this. (auto_ghz_init, auto_ghz_free, auto_ghz_bench, auto_ghz_detect_ops) (get_auto_ghz, do_slope_benchmark): New. (double_to_str): Round number larger than 1000 to integer. (bench_print_result_csv, bench_print_result_std) (bench_print_result, bench_print_header, cipher_bench_one) (hash_bench_one, mac_bench_one, kdf_bench_one, kdf_bench): Add auto-detected frequency printing. (print_help): Help for CPU speed auto-detection mode. (main): Add parsing for "--cpu-mhz auto". -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/tests/bench-slope.c b/tests/bench-slope.c index e34104f7b..5c64f229d 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -50,6 +50,9 @@ static int num_measurement_repetitions; results. */ static double cpu_ghz = -1; +/* Attempt to autodetect CPU Ghz. */ +static int auto_ghz; + /* Whether we are running as part of the regression test suite. */ static int in_regression_test; @@ -220,6 +223,7 @@ struct bench_obj unsigned int step_size; void *priv; + void *hd; }; typedef int (*const bench_initialize_t) (struct bench_obj * obj); @@ -383,7 +387,7 @@ adjust_loop_iterations_to_timer_accuracy (struct bench_obj *obj, void *buffer, /* Benchmark and return linear regression slope in nanoseconds per byte. */ double -do_slope_benchmark (struct bench_obj *obj) +slope_benchmark (struct bench_obj *obj) { unsigned int num_measurements; double *measurements = NULL; @@ -464,6 +468,122 @@ err_free: return -1; } +/********************************************* CPU frequency auto-detection. */ + +static int +auto_ghz_init (struct bench_obj *obj) +{ + obj->min_bufsize = 16; + obj->max_bufsize = 64 + obj->min_bufsize; + obj->step_size = 8; + obj->num_measure_repetitions = 16; + + return 0; +} + +static void +auto_ghz_free (struct bench_obj *obj) +{ + (void)obj; +} + +static void +auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) +{ + (void)obj; + (void)buf; + + buflen *= 1024; + + /* Turbo frequency detection benchmark. Without CPU turbo-boost, this + * function will give cycles/iteration result 1024.0 on high-end CPUs. + * With turbo, result will be less and can be used detect turbo-clock. */ + + do + { +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY + /* Use memory barrier to prevent compiler from optimizing this loop + * away. */ + + asm volatile ("":::"memory"); +#else + /* TODO: Needs alternative way. */ +#endif + } + while (--buflen); +} + +static struct bench_ops auto_ghz_detect_ops = { + &auto_ghz_init, + &auto_ghz_free, + &auto_ghz_bench +}; + + +double +get_auto_ghz (void) +{ + struct bench_obj obj = { 0 }; + double nsecs_per_iteration; + double cycles_per_iteration; + + obj.ops = &auto_ghz_detect_ops; + + nsecs_per_iteration = slope_benchmark (&obj); + + cycles_per_iteration = nsecs_per_iteration * cpu_ghz; + + /* Adjust CPU Ghz so that cycles per iteration would give '1024.0'. */ + + return cpu_ghz * 1024 / cycles_per_iteration; +} + + +double +do_slope_benchmark (struct bench_obj *obj, double *bench_ghz) +{ + double ret; + + if (!auto_ghz) + { + /* Perform measurement without autodetection of CPU frequency. */ + + ret = slope_benchmark (obj); + + *bench_ghz = cpu_ghz; + } + else + { + double cpu_auto_ghz_before; + double cpu_auto_ghz_after; + double nsecs_per_iteration; + double diff; + + /* Perform measurement with CPU frequency autodetection. */ + + do + { + /* Repeat measurement until CPU turbo frequency has stabilized. */ + + cpu_auto_ghz_before = get_auto_ghz (); + + nsecs_per_iteration = slope_benchmark (obj); + + cpu_auto_ghz_after = get_auto_ghz (); + + diff = 1.0 - (cpu_auto_ghz_before / cpu_auto_ghz_after); + diff = diff < 0 ? -diff : diff; + } + while (diff > 5e-5); + + ret = nsecs_per_iteration; + + *bench_ghz = cpu_auto_ghz_after; + } + + return ret; +} + /********************************************************** Printing results. */ @@ -476,29 +596,34 @@ double_to_str (char *out, size_t outlen, double value) fmt = "%.3f"; else if (value < 100.0) fmt = "%.2f"; - else + else if (value < 1000.0) fmt = "%.1f"; + else + fmt = "%.0f"; snprintf (out, outlen, fmt, value); } static void -bench_print_result_csv (double nsecs_per_byte) +bench_print_result_csv (double nsecs_per_byte, double bench_ghz) { double cycles_per_byte, mbytes_per_sec; char nsecpbyte_buf[16]; char mbpsec_buf[16]; char cpbyte_buf[16]; + char mhz_buf[16]; *cpbyte_buf = 0; + *mhz_buf = 0; double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte); /* If user didn't provide CPU speed, we cannot show cycles/byte results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_byte = nsecs_per_byte * cpu_ghz; + cycles_per_byte = nsecs_per_byte * bench_ghz; double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } mbytes_per_sec = @@ -506,50 +631,76 @@ bench_print_result_csv (double nsecs_per_byte) double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec); /* We print two empty fields to allow for future enhancements. */ - printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n", - current_section_name, - current_algo_name? current_algo_name : "", - current_mode_name? current_mode_name : "", - nsecpbyte_buf, - mbpsec_buf, - cpbyte_buf); - + if (auto_ghz) + { + printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B,%s,Mhz\n", + current_section_name, + current_algo_name? current_algo_name : "", + current_mode_name? current_mode_name : "", + nsecpbyte_buf, + mbpsec_buf, + cpbyte_buf, + mhz_buf); + } + else + { + printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n", + current_section_name, + current_algo_name? current_algo_name : "", + current_mode_name? current_mode_name : "", + nsecpbyte_buf, + mbpsec_buf, + cpbyte_buf); + } } static void -bench_print_result_std (double nsecs_per_byte) +bench_print_result_std (double nsecs_per_byte, double bench_ghz) { double cycles_per_byte, mbytes_per_sec; char nsecpbyte_buf[16]; char mbpsec_buf[16]; char cpbyte_buf[16]; + char mhz_buf[16]; double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte); /* If user didn't provide CPU speed, we cannot show cycles/byte results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_byte = nsecs_per_byte * cpu_ghz; + cycles_per_byte = nsecs_per_byte * bench_ghz; double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } else - strcpy (cpbyte_buf, "-"); + { + strcpy (cpbyte_buf, "-"); + strcpy (mhz_buf, "-"); + } mbytes_per_sec = (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024); double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec); - printf ("%9s ns/B %9s MiB/s %9s c/B\n", - nsecpbyte_buf, mbpsec_buf, cpbyte_buf); + if (auto_ghz) + { + printf ("%9s ns/B %9s MiB/s %9s c/B %9s\n", + nsecpbyte_buf, mbpsec_buf, cpbyte_buf, mhz_buf); + } + else + { + printf ("%9s ns/B %9s MiB/s %9s c/B\n", + nsecpbyte_buf, mbpsec_buf, cpbyte_buf); + } } static void -bench_print_result (double nsecs_per_byte) +bench_print_result (double nsecs_per_byte, double bench_ghz) { if (csv_mode) - bench_print_result_csv (nsecs_per_byte); + bench_print_result_csv (nsecs_per_byte, bench_ghz); else - bench_print_result_std (nsecs_per_byte); + bench_print_result_std (nsecs_per_byte, bench_ghz); } static void @@ -578,8 +729,13 @@ bench_print_header (int algo_width, const char *algo_name) printf (" %-*s | ", -algo_width, algo_name); else printf (" %-*s | ", algo_width, algo_name); - printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec", - "cycles/byte"); + + if (auto_ghz) + printf ("%14s %15s %13s %9s\n", "nanosecs/byte", "mebibytes/sec", + "cycles/byte", "auto Mhz"); + else + printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec", + "cycles/byte"); } } @@ -684,7 +840,7 @@ bench_encrypt_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -692,7 +848,7 @@ bench_encrypt_init (struct bench_obj *obj) static void bench_encrypt_free (struct bench_obj *obj) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; gcry_cipher_close (hd); } @@ -700,7 +856,7 @@ bench_encrypt_free (struct bench_obj *obj) static void bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; err = gcry_cipher_encrypt (hd, buf, buflen, buf, buflen); @@ -716,7 +872,7 @@ bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; err = gcry_cipher_decrypt (hd, buf, buflen, buf, buflen); @@ -790,7 +946,7 @@ bench_xts_encrypt_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -798,7 +954,7 @@ bench_xts_encrypt_init (struct bench_obj *obj) static void bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; unsigned int pos; static const char tweak[16] = { 0xff, 0xff, 0xfe, }; size_t sectorlen = obj->step_size; @@ -825,7 +981,7 @@ bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_xts_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; unsigned int pos; static const char tweak[16] = { 0xff, 0xff, 0xfe, }; size_t sectorlen = obj->step_size; @@ -865,7 +1021,7 @@ static struct bench_ops xts_decrypt_ops = { static void bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8]; char nonce[11] = { 0x80, 0x01, }; @@ -909,7 +1065,7 @@ bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_ccm_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8] = { 0, }; char nonce[11] = { 0x80, 0x01, }; @@ -956,7 +1112,7 @@ static void bench_ccm_authenticate_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8] = { 0, }; char nonce[11] = { 0x80, 0x01, }; @@ -1030,7 +1186,7 @@ static void bench_aead_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16]; @@ -1060,7 +1216,7 @@ static void bench_aead_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16] = { 0, }; @@ -1093,7 +1249,7 @@ bench_aead_authenticate_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16] = { 0, }; char data = 0xff; @@ -1360,6 +1516,7 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode) struct bench_cipher_mode mode = *pmode; struct bench_obj obj = { 0 }; double result; + double bench_ghz; unsigned int blklen; mode.algo = algo; @@ -1404,9 +1561,9 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } @@ -1483,7 +1640,7 @@ bench_hash_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -1491,7 +1648,7 @@ bench_hash_init (struct bench_obj *obj) static void bench_hash_free (struct bench_obj *obj) { - gcry_md_hd_t hd = obj->priv; + gcry_md_hd_t hd = obj->hd; gcry_md_close (hd); } @@ -1499,7 +1656,7 @@ bench_hash_free (struct bench_obj *obj) static void bench_hash_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_md_hd_t hd = obj->priv; + gcry_md_hd_t hd = obj->hd; gcry_md_reset (hd); gcry_md_write (hd, buf, buflen); @@ -1524,6 +1681,7 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode) { struct bench_hash_mode mode = *pmode; struct bench_obj obj = { 0 }; + double bench_ghz; double result; mode.algo = algo; @@ -1536,9 +1694,9 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } static void @@ -1645,7 +1803,7 @@ bench_mac_init (struct bench_obj *obj) break; } - obj->priv = hd; + obj->hd = hd; free (key); return 0; @@ -1654,7 +1812,7 @@ bench_mac_init (struct bench_obj *obj) static void bench_mac_free (struct bench_obj *obj) { - gcry_mac_hd_t hd = obj->priv; + gcry_mac_hd_t hd = obj->hd; gcry_mac_close (hd); } @@ -1662,7 +1820,7 @@ bench_mac_free (struct bench_obj *obj) static void bench_mac_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_mac_hd_t hd = obj->priv; + gcry_mac_hd_t hd = obj->hd; size_t bs; char b; @@ -1690,6 +1848,7 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode) { struct bench_mac_mode mode = *pmode; struct bench_obj obj = { 0 }; + double bench_ghz; double result; mode.algo = algo; @@ -1702,9 +1861,9 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } static void @@ -1807,9 +1966,11 @@ kdf_bench_one (int algo, int subalgo) struct bench_obj obj = { 0 }; double nsecs_per_iteration; double cycles_per_iteration; + double bench_ghz; char algo_name[32]; char nsecpiter_buf[16]; char cpiter_buf[16]; + char mhz_buf[16]; mode.algo = algo; mode.subalgo = subalgo; @@ -1843,31 +2004,45 @@ kdf_bench_one (int algo, int subalgo) obj.ops = mode.ops; obj.priv = &mode; - nsecs_per_iteration = do_slope_benchmark (&obj); + nsecs_per_iteration = do_slope_benchmark (&obj, &bench_ghz); strcpy(cpiter_buf, csv_mode ? "" : "-"); + strcpy(mhz_buf, csv_mode ? "" : "-"); double_to_str (nsecpiter_buf, sizeof (nsecpiter_buf), nsecs_per_iteration); /* If user didn't provide CPU speed, we cannot show cycles/iter results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_iteration = nsecs_per_iteration * cpu_ghz; + cycles_per_iteration = nsecs_per_iteration * bench_ghz; double_to_str (cpiter_buf, sizeof (cpiter_buf), cycles_per_iteration); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } if (csv_mode) { - printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n", - current_section_name, - current_algo_name ? current_algo_name : "", - current_mode_name ? current_mode_name : "", - nsecpiter_buf, - cpiter_buf); + if (auto_ghz) + printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter,%s,Mhz\n", + current_section_name, + current_algo_name ? current_algo_name : "", + current_mode_name ? current_mode_name : "", + nsecpiter_buf, + cpiter_buf, + mhz_buf); + else + printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n", + current_section_name, + current_algo_name ? current_algo_name : "", + current_mode_name ? current_mode_name : "", + nsecpiter_buf, + cpiter_buf); } else { - printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf); + if (auto_ghz) + printf ("%14s %13s %9s\n", nsecpiter_buf, cpiter_buf, mhz_buf); + else + printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf); } } @@ -1882,7 +2057,10 @@ kdf_bench (char **argv, int argc) if (!csv_mode) { printf (" %-*s | ", 24, ""); - printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter"); + if (auto_ghz) + printf ("%14s %13s %9s\n", "nanosecs/iter", "cycles/iter", "auto Mhz"); + else + printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter"); } if (argv && argc) @@ -1923,7 +2101,8 @@ print_help (void) "", " options:", " --cpu-mhz Set CPU speed for calculating cycles", - " per bytes results.", + " per bytes results. Set as \"auto\"", + " for auto-detection of CPU speed.", " --disable-hwf Disable hardware acceleration feature(s)", " for benchmarking.", " --repetitions Use N repetitions (default " @@ -2039,8 +2218,15 @@ main (int argc, char **argv) argv++; if (argc) { - cpu_ghz = atof (*argv); - cpu_ghz /= 1000; /* Mhz => Ghz */ + if (!strcmp (*argv, "auto")) + { + auto_ghz = 1; + } + else + { + cpu_ghz = atof (*argv); + cpu_ghz /= 1000; /* Mhz => Ghz */ + } argc--; argv++; From smueller at chronox.de Mon Mar 12 08:45:59 2018 From: smueller at chronox.de (Stephan Mueller) Date: Mon, 12 Mar 2018 08:45:59 +0100 Subject: SP800-38F AES kw encryption broken? Message-ID: <4386046.jcxHWR5DvT@tauon.chronox.de> Hi, The function _gcry_cipher_aeswrap_encrypt seems to be broken as it does not produce the expected ciphertext. When I invoke the encryption operation with the following data key = f59782f1dceb0544a8da06b34969b9212b55ce6dcbdd0975a33f4b3f88b538da plain = 73d33060b5f9f2eb5785c0703ddfa704 I obtain the following: ciphertext = 888268c16299bc292154bd5ee49a27a521d3299e02eff900 However, when I decrypt the ciphertext immediately following the encryption operation, I get the following plain = a6a6a6a6a6a6a6a65785c0703ddfa704 This seems to indicate that the first semiblock of the plaintext does not seem to be used at all but rather is replaced with the default IV. The decryption function works as expected. Unfortunately the current git repo cannot be compiled ("cannot find mpi- internal.h", no rule to generate chacha20-sse2-amd64.S) so I cannot debug the code. Ciao Stephan From smueller at chronox.de Mon Mar 12 22:24:37 2018 From: smueller at chronox.de (Stephan Mueller) Date: Mon, 12 Mar 2018 22:24:37 +0100 Subject: [PATCH] AES-KW: fix in-place encryption In-Reply-To: <4386046.jcxHWR5DvT@tauon.chronox.de> References: <4386046.jcxHWR5DvT@tauon.chronox.de> Message-ID: <10870564.SVcAGkk7tg@tauon.chronox.de> In case AES-KW in-place encryption is performed, the plaintext must be moved to the correct destination location before the first semiblock of the destination buffer is modified. Without the patch, the first semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6. * cipher/cipher-aeswrap.c: move memove call before KW IV setting Signed-off-by: Stephan Mueller --- cipher/cipher-aeswrap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c index 698742df..a8d0e03e 100644 --- a/cipher/cipher-aeswrap.c +++ b/cipher/cipher-aeswrap.c @@ -70,6 +70,9 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, a = outbuf; /* We store A directly in OUTBUF. */ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + /* Copy the inbuf to the outbuf. */ + memmove (r+8, inbuf, inbuflen); + /* If an IV has been set we use that IV as the Alternative Initial Value; if it has not been set we use the standard value. */ if (c->marks.iv) @@ -77,9 +80,6 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, else memset (a, 0xa6, 8); - /* Copy the inbuf to the outbuf. */ - memmove (r+8, inbuf, inbuflen); - memset (t, 0, sizeof t); /* t := 0. */ for (j = 0; j <= 5; j++) -- 2.14.3 From martin at martin.st Thu Mar 22 09:56:14 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 10:56:14 +0200 Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an int parameter In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: <1521708975-30902-4-git-send-email-martin@martin.st> The keybits parameter is declared as int, and in those cases, the upper half of a register is undefined, not guaranteed to be zero. Signed-off-by: Martin Storsj? --- I didn't check other files and functions for the same issue, I just happened to notice this one while looking closer at the camellia source file. >From previous experience, clang can be pretty aggressive with passing in undefined data in the upper half of registers, where it isn't supposed to make any difference. --- cipher/camellia-aarch64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 440f69f..68d2a7d 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -33,7 +33,7 @@ #define CTX x0 #define RDST x1 #define RSRC x2 -#define RKEYBITS x3 +#define RKEYBITS w3 #define RTAB1 x4 #define RTAB2 x5 -- 2.7.4 From martin at martin.st Thu Mar 22 09:56:15 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 10:56:15 +0200 Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly for windows In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: <1521708975-30902-5-git-send-email-martin@martin.st> Don't require .type and .size in configure; we can make them optional via a preprocessor macro. This is mostly a mechanical change, wrapping the .type and .size directives in an ELF() macro, with two actual manual changes (when targeting windows): - Don't load global symbols via a GOT (in chacha20) - Don't use the x18 register (in camellia); back up and restore x19 in the prologue/epilogue and use that instead (on windows only). x18 is a platform specific register; on linux, it's free to be used by user code, while it's reserved for platform use on windows and darwin. Signed-off-by: Martin Storsj? --- This isn't strictly necessary for building libgcrypt for windows on aarch64; previously configure concludes that the assembly can't be built since the .type and .size directives don't work. This just allows using more of the existing assembly routines. This also probably has the effect that the same assembly gets enabled when targeting darwin (iOS), but building with assembly enabled doesn't work for darwin anyway (even prior to this change, since darwin requires an extra leading underscore on all symbols, while the mpi/aarch64 code gets automatically enabled). --- cipher/asm-common-aarch64.h | 32 ++++++++++++++++++++ cipher/camellia-aarch64.S | 34 ++++++++++++++++----- cipher/chacha20-aarch64.S | 12 ++++++-- cipher/cipher-gcm-armv8-aarch64-ce.S | 10 +++---- cipher/rijndael-aarch64.S | 10 +++---- cipher/rijndael-armv8-aarch64-ce.S | 58 ++++++++++++++++++------------------ cipher/sha1-armv8-aarch64-ce.S | 6 ++-- cipher/sha256-armv8-aarch64-ce.S | 6 ++-- cipher/twofish-aarch64.S | 10 +++---- configure.ac | 4 --- 10 files changed, 118 insertions(+), 64 deletions(-) create mode 100644 cipher/asm-common-aarch64.h diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h new file mode 100644 index 0000000..814b7ad --- /dev/null +++ b/cipher/asm-common-aarch64.h @@ -0,0 +1,32 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#include + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 68d2a7d..ec4ebef 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -19,11 +19,17 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS +#ifdef _WIN32 +# define WIN(...) __VA_ARGS__ +#else +# define WIN(...) /*_*/ +#endif + .text /* struct camellia_ctx: */ @@ -55,12 +61,21 @@ #define RT0 w15 #define RT1 w16 #define RT2 w17 +#ifdef _WIN32 +#define RT3 w19 +#else #define RT3 w18 +#endif #define xRT0 x15 #define xRT1 x16 #define xRT2 x17 +#ifdef _WIN32 +/* w18/x18 is reserved and can't be used on windows. */ +#define xRT3 x19 +#else #define xRT3 x18 +#endif #ifdef __AARCH64EL__ #define host_to_be(reg, rtmp) \ @@ -198,9 +213,10 @@ str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); .globl _gcry_camellia_arm_encrypt_block -.type _gcry_camellia_arm_encrypt_block, at function; +ELF(.type _gcry_camellia_arm_encrypt_block, at function;) _gcry_camellia_arm_encrypt_block: + WIN(stp x19, x30, [sp, #-16]!) /* input: * x0: keytable * x1: dst @@ -227,6 +243,7 @@ _gcry_camellia_arm_encrypt_block: outunpack(24); + WIN(ldp x19, x30, [sp], #16) ret; .ltorg @@ -236,14 +253,16 @@ _gcry_camellia_arm_encrypt_block: outunpack(32); + WIN(ldp x19, x30, [sp], #16) ret; .ltorg -.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; +ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;) .globl _gcry_camellia_arm_decrypt_block -.type _gcry_camellia_arm_decrypt_block, at function; +ELF(.type _gcry_camellia_arm_decrypt_block, at function;) _gcry_camellia_arm_decrypt_block: + WIN(stp x19, x30, [sp, #-16]!) /* input: * x0: keytable * x1: dst @@ -271,6 +290,7 @@ _gcry_camellia_arm_decrypt_block: outunpack(0); + WIN(ldp x19, x30, [sp], #16) ret; .ltorg @@ -281,11 +301,11 @@ _gcry_camellia_arm_decrypt_block: b .Ldec_128; .ltorg -.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; +ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;) /* Encryption/Decryption tables */ .globl _gcry_camellia_arm_tables -.type _gcry_camellia_arm_tables, at object; +ELF(.type _gcry_camellia_arm_tables, at object;) .balign 32 _gcry_camellia_arm_tables: .Lcamellia_sp1110: @@ -551,7 +571,7 @@ _gcry_camellia_arm_tables: .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e -.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables; +ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 5990a08..3844d4e 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -27,7 +27,7 @@ * Public domain. */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -38,9 +38,15 @@ .text +#ifdef _WIN32 +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name ; \ + add reg, reg, #:lo12:name ; +#else #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; +#endif /* register macros */ #define INPUT x0 @@ -148,7 +154,7 @@ chacha20_data: .align 3 .globl _gcry_chacha20_aarch64_blocks4 -.type _gcry_chacha20_aarch64_blocks4,%function; +ELF(.type _gcry_chacha20_aarch64_blocks4,%function;) _gcry_chacha20_aarch64_blocks4: /* input: @@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4: eor x0, x0, x0 ret -.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4; +ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;) #endif diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index 0cfaf1c..b6c4f59 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -174,7 +174,7 @@ gcry_gcm_reduction_constant: */ .align 3 .globl _gcry_ghash_armv8_ce_pmull -.type _gcry_ghash_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_armv8_ce_pmull,%function;) _gcry_ghash_armv8_ce_pmull: /* input: * x0: gcm_key @@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull: .Ldo_nothing: mov x0, #0 ret -.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; +ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;) /* @@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull: */ .align 3 .globl _gcry_ghash_setup_armv8_ce_pmull -.type _gcry_ghash_setup_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;) _gcry_ghash_setup_armv8_ce_pmull: /* input: * x0: gcm_key @@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull: st1 {rh5.16b-rh6.16b}, [x1] ret -.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; +ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;) #endif diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S index e533bbe..aad7487 100644 --- a/cipher/rijndael-aarch64.S +++ b/cipher/rijndael-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -206,7 +206,7 @@ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_encrypt_block -.type _gcry_aes_arm_encrypt_block,%function; +ELF(.type _gcry_aes_arm_encrypt_block,%function;) _gcry_aes_arm_encrypt_block: /* input: @@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block: lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; -.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; +ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;) #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ @@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block: addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_decrypt_block -.type _gcry_aes_arm_decrypt_block,%function; +ELF(.type _gcry_aes_arm_decrypt_block,%function;) _gcry_aes_arm_decrypt_block: /* input: @@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block: decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; -.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block; +ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__ */ diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 40097a7..5859557 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -239,7 +239,7 @@ */ .align 3 .globl _gcry_aes_enc_armv8_ce -.type _gcry_aes_enc_armv8_ce,%function; +ELF(.type _gcry_aes_enc_armv8_ce,%function;) _gcry_aes_enc_armv8_ce: /* input: * x0: keysched @@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Lenc1_tail -.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; +ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;) /* @@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce: */ .align 3 .globl _gcry_aes_dec_armv8_ce -.type _gcry_aes_dec_armv8_ce,%function; +ELF(.type _gcry_aes_dec_armv8_ce,%function;) _gcry_aes_dec_armv8_ce: /* input: * x0: keysched @@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Ldec1_tail -.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; +ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;) /* @@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce: .align 3 .globl _gcry_aes_cbc_enc_armv8_ce -.type _gcry_aes_cbc_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_enc_armv8_ce,%function;) _gcry_aes_cbc_enc_armv8_ce: /* input: * x0: keysched @@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce: .Lcbc_enc_skip: ret -.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; +ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;) /* * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, @@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce: .align 3 .globl _gcry_aes_cbc_dec_armv8_ce -.type _gcry_aes_cbc_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_dec_armv8_ce,%function;) _gcry_aes_cbc_dec_armv8_ce: /* input: * x0: keysched @@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce: .Lcbc_dec_skip: ret -.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; +ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;) /* @@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce: .align 3 .globl _gcry_aes_ctr_enc_armv8_ce -.type _gcry_aes_ctr_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ctr_enc_armv8_ce,%function;) _gcry_aes_ctr_enc_armv8_ce: /* input: * r0: keysched @@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce: .Lctr_enc_skip: ret -.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; +ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;) /* @@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_enc_armv8_ce -.type _gcry_aes_cfb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_enc_armv8_ce,%function;) _gcry_aes_cfb_enc_armv8_ce: /* input: * r0: keysched @@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce: .Lcfb_enc_skip: ret -.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; +ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;) /* @@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_dec_armv8_ce -.type _gcry_aes_cfb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_dec_armv8_ce,%function;) _gcry_aes_cfb_dec_armv8_ce: /* input: * r0: keysched @@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce: .Lcfb_dec_skip: ret -.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; +ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;) /* @@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_enc_armv8_ce -.type _gcry_aes_ocb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_enc_armv8_ce,%function;) _gcry_aes_ocb_enc_armv8_ce: /* input: * x0: keysched @@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; +ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;) /* @@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce: .align 3 .globl _gcry_aes_ocb_dec_armv8_ce -.type _gcry_aes_ocb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_dec_armv8_ce,%function;) _gcry_aes_ocb_dec_armv8_ce: /* input: * x0: keysched @@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; +ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;) /* @@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_auth_armv8_ce -.type _gcry_aes_ocb_auth_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_auth_armv8_ce,%function;) _gcry_aes_ocb_auth_armv8_ce: /* input: * x0: keysched @@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; +ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) /* @@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce: .align 3 .globl _gcry_aes_xts_enc_armv8_ce -.type _gcry_aes_xts_enc_armv8_ce,%function; +ELF(.type _gcry_aes_xts_enc_armv8_ce,%function;) _gcry_aes_xts_enc_armv8_ce: /* input: * r0: keysched @@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce: .Lxts_enc_skip: ret -.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; +ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) /* @@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce: .align 3 .globl _gcry_aes_xts_dec_armv8_ce -.type _gcry_aes_xts_dec_armv8_ce,%function; +ELF(.type _gcry_aes_xts_dec_armv8_ce,%function;) _gcry_aes_xts_dec_armv8_ce: /* input: * r0: keysched @@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce: .Lxts_dec_skip: ret -.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; +ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;) /* @@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce: */ .align 3 .globl _gcry_aes_sbox4_armv8_ce -.type _gcry_aes_sbox4_armv8_ce,%function; +ELF(.type _gcry_aes_sbox4_armv8_ce,%function;) _gcry_aes_sbox4_armv8_ce: /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in * Cryptology ? CT-RSA 2015" for details. @@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce: mov w0, v0.S[0] CLEAR_REG(v0) ret -.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; +ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;) /* @@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce: */ .align 3 .globl _gcry_aes_invmixcol_armv8_ce -.type _gcry_aes_invmixcol_armv8_ce,%function; +ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;) _gcry_aes_invmixcol_armv8_ce: ld1 {v0.16b}, [x1] aesimc v0.16b, v0.16b st1 {v0.16b}, [x0] CLEAR_REG(v0) ret -.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; +ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;) #endif diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S index ec1810d..aeb67a1 100644 --- a/cipher/sha1-armv8-aarch64-ce.S +++ b/cipher/sha1-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC: */ .align 3 .globl _gcry_sha1_transform_armv8_ce -.type _gcry_sha1_transform_armv8_ce,%function; +ELF(.type _gcry_sha1_transform_armv8_ce,%function;) _gcry_sha1_transform_armv8_ce: /* input: * x0: ctx, CTX @@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; +ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;) #endif diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S index a4575da..6b3ad32 100644 --- a/cipher/sha256-armv8-aarch64-ce.S +++ b/cipher/sha256-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K: */ .align 3 .globl _gcry_sha256_transform_armv8_ce -.type _gcry_sha256_transform_armv8_ce,%function; +ELF(.type _gcry_sha256_transform_armv8_ce,%function;) _gcry_sha256_transform_armv8_ce: /* input: * r0: ctx, CTX @@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; +ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;) #endif diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S index 99c4675..adee412 100644 --- a/cipher/twofish-aarch64.S +++ b/cipher/twofish-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -217,7 +217,7 @@ ror1(RD); .globl _gcry_twofish_arm_encrypt_block -.type _gcry_twofish_arm_encrypt_block,%function; +ELF(.type _gcry_twofish_arm_encrypt_block,%function;) _gcry_twofish_arm_encrypt_block: /* input: @@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block: ret; .ltorg -.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; +ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;) .globl _gcry_twofish_arm_decrypt_block -.type _gcry_twofish_arm_decrypt_block,%function; +ELF(.type _gcry_twofish_arm_decrypt_block,%function;) _gcry_twofish_arm_decrypt_block: /* input: @@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block: str_output_le(RDST, RA, RB, RC, RD, RT0, RT1); ret; -.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; +ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/configure.ac b/configure.ac index b5d7211..330485f 100644 --- a/configure.ac +++ b/configure.ac @@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" - - /* Test if '.type' and '.size' are supported. */ - ".size asmfunc,.-asmfunc;\n\t" - ".type asmfunc, at function;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then -- 2.7.4 From martin at martin.st Thu Mar 22 09:56:11 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 10:56:11 +0200 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 Message-ID: <1521708975-30902-1-git-send-email-martin@martin.st> This fixes building this file for windows on aarch64. Signed-off-by: Martin Storsj? --- random/rndw32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/random/rndw32.c b/random/rndw32.c index 7e9ac50..08a8867 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, On AMD64, TSC is always available and intrinsic is provided for accessing it. */ -#ifdef __WIN64__ +#ifdef __x86_64__ { unsigned __int64 aint64; @@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, (*add) (&aword, sizeof (aword), origin ); } } -#endif /*__WIN64__*/ +#endif /*__x86_64__*/ } -- 2.7.4 From martin at martin.st Thu Mar 22 09:56:13 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 10:56:13 +0200 Subject: [PATCH 3/5] aarch64: Fix assembling chacha20-aarch64.S with clang/llvm In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: <1521708975-30902-3-git-send-email-martin@martin.st> When referring to a specific lane, one doesn't need to specify the total number of lanes of the register. With GNU binutils, both forms are accepted, while clang/llvm rejects the form with the unnecessary number of lanes. Signed-off-by: Martin Storsj? --- cipher/chacha20-aarch64.S | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 739ddde..5990a08 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4: mov ROUND, #20; ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; - dup X12.4s, X15.4s[0]; - dup X13.4s, X15.4s[1]; + dup X12.4s, X15.s[0]; + dup X13.4s, X15.s[1]; ldr CTR, [INPUT_CTR]; add X12.4s, X12.4s, VCTR.4s; - dup X0.4s, VTMP1.4s[0]; - dup X1.4s, VTMP1.4s[1]; - dup X2.4s, VTMP1.4s[2]; - dup X3.4s, VTMP1.4s[3]; - dup X14.4s, X15.4s[2]; + dup X0.4s, VTMP1.s[0]; + dup X1.4s, VTMP1.s[1]; + dup X2.4s, VTMP1.s[2]; + dup X3.4s, VTMP1.s[3]; + dup X14.4s, X15.s[2]; cmhi VTMP0.4s, VCTR.4s, X12.4s; - dup X15.4s, X15.4s[3]; + dup X15.4s, X15.s[3]; add CTR, CTR, #4; /* Update counter */ - dup X4.4s, VTMP2.4s[0]; - dup X5.4s, VTMP2.4s[1]; - dup X6.4s, VTMP2.4s[2]; - dup X7.4s, VTMP2.4s[3]; + dup X4.4s, VTMP2.s[0]; + dup X5.4s, VTMP2.s[1]; + dup X6.4s, VTMP2.s[2]; + dup X7.4s, VTMP2.s[3]; sub X13.4s, X13.4s, VTMP0.4s; - dup X8.4s, VTMP3.4s[0]; - dup X9.4s, VTMP3.4s[1]; - dup X10.4s, VTMP3.4s[2]; - dup X11.4s, VTMP3.4s[3]; + dup X8.4s, VTMP3.s[0]; + dup X9.4s, VTMP3.s[1]; + dup X10.4s, VTMP3.s[2]; + dup X11.4s, VTMP3.s[3]; mov X12_TMP.16b, X12.16b; mov X13_TMP.16b, X13.16b; str CTR, [INPUT_CTR]; @@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ PLUS(X0, VTMP2); PLUS(X1, VTMP3); PLUS(X2, X12_TMP); PLUS(X3, X13_TMP); - dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */ - dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */ - dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */ - dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */ + dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ + dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ + dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ + dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; mov INPUT_POS, INPUT; PLUS(X4, VTMP2); @@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X6, X12_TMP); PLUS(X7, X13_TMP); - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */ - dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */ - dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ + dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ + dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ PLUS(X8, VTMP2); PLUS(X9, VTMP3); PLUS(X10, X12_TMP); -- 2.7.4 From martin at martin.st Thu Mar 22 09:56:12 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 10:56:12 +0200 Subject: [PATCH 2/5] aarch64: mpi: Fix building the mpi aarch64 assembly for windows In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: <1521708975-30902-2-git-send-email-martin@martin.st> The mpi aarch64 assembly is enabled as soon as the compiler supports inline assembly, without checking for .type and .size, as is done for the rest of the assembly in cipher/*.S. (The .type and .size directives are only supported on ELF.) Signed-off-by: Martin Storsj? --- mpi/aarch64/mpih-add1.S | 5 +++-- mpi/aarch64/mpih-mul1.S | 5 +++-- mpi/aarch64/mpih-mul2.S | 5 +++-- mpi/aarch64/mpih-mul3.S | 5 +++-- mpi/aarch64/mpih-sub1.S | 5 +++-- mpi/asm-common-aarch64.h | 30 ++++++++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 mpi/asm-common-aarch64.h diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index fa8cd01..4ead1c2 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_add_n -.type _gcry_mpih_add_n,%function +ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: and x5, x3, #3; adds xzr, xzr, xzr; /* clear carry flag */ @@ -68,4 +69,4 @@ _gcry_mpih_add_n: .Lend: adc x0, xzr, xzr; ret; -.size _gcry_mpih_add_n,.-_gcry_mpih_add_n; +ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;) diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 65e98fe..8a86269 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_mul_1 -.type _gcry_mpih_mul_1,%function +ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: and x5, x2, #3; mov x4, xzr; @@ -93,4 +94,4 @@ _gcry_mpih_mul_1: .Lend: mov x0, x4; ret; -.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1; +ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;) diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index bd3b2c9..c7c08e5 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_addmul_1 -.type _gcry_mpih_addmul_1,%function +ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: and x5, x2, #3; mov x6, xzr; @@ -105,4 +106,4 @@ _gcry_mpih_addmul_1: .Lend: mov x0, x6; ret; -.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1; +ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;) diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index a58bc53..ccc961e 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_submul_1 -.type _gcry_mpih_submul_1,%function +ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: and x5, x2, #3; mov x7, xzr; @@ -118,4 +119,4 @@ _gcry_mpih_submul_1: .Loop_end: cinc x0, x7, cc; ret; -.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1; +ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;) diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index cbf2f08..4a66373 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_sub_n -.type _gcry_mpih_sub_n,%function +ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: and x5, x3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ @@ -68,4 +69,4 @@ _gcry_mpih_sub_n: .Lend: cset x0, cc; ret; -.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n; +ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;) diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h new file mode 100644 index 0000000..1f646cb --- /dev/null +++ b/mpi/asm-common-aarch64.h @@ -0,0 +1,30 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ -- 2.7.4 From cvs at cvs.gnupg.org Thu Mar 22 15:35:41 2018 From: cvs at cvs.gnupg.org (by Werner Koch) Date: Thu, 22 Mar 2018 15:35:41 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-46-g92fd86e Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 92fd86e9956ef3fea51d72495fd0da09522e57a1 (commit) from 0b3ec359e2279c3b46b171372b1b7733bba20cd7 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 92fd86e9956ef3fea51d72495fd0da09522e57a1 Author: Werner Koch Date: Thu Mar 22 15:28:04 2018 +0100 doc: Clarify the value range of the use-rsa-e parameter. -- Signed-off-by: Werner Koch diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index bba07a4..967745f 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -2905,7 +2905,9 @@ Use the given value. @noindent If this parameter is not used, Libgcrypt uses for historic reasons -65537. +65537. Note that the value must fit into a 32 bit unsigned variable +and that the usual C prefixes are considered (e.g. 017 gives 15). + @item qbits @var{n} This is only meanigful for DSA keys. If it is given the DSA key is diff --git a/src/sexp.c b/src/sexp.c index 0462d92..9d89268 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -401,7 +401,7 @@ _gcry_sexp_vlist( const gcry_sexp_t a, ... ) /**************** * Append n to the list a - * Returns: a new ist (which maybe a) + * Returns: a new list (which maybe a) */ gcry_sexp_t _gcry_sexp_append( const gcry_sexp_t a, const gcry_sexp_t n ) ----------------------------------------------------------------------- Summary of changes: doc/gcrypt.texi | 4 +++- src/sexp.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Thu Mar 22 20:10:40 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 22 Mar 2018 21:10:40 +0200 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: <1521708975-30902-1-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: Hello, On 22.03.2018 10:56, Martin Storsj? wrote: > This fixes building this file for windows on aarch64. > > Signed-off-by: Martin Storsj? Thanks for the patches. There is few generic things that needs to be done before these can be applied: - Check signing off DCO part in doc/HACKING - Add ChangeLog entries to beginning of commit log, see existing commit messages for example. I'm interested in testing these patches myself. Can you give some pointers for how to cross-compile libgcrypt for windows/aarch64? -Jussi > --- > random/rndw32.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/random/rndw32.c b/random/rndw32.c > index 7e9ac50..08a8867 100644 > --- a/random/rndw32.c > +++ b/random/rndw32.c > @@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, > > On AMD64, TSC is always available and intrinsic is provided for accessing > it. */ > -#ifdef __WIN64__ > +#ifdef __x86_64__ > { > unsigned __int64 aint64; > > @@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, > (*add) (&aword, sizeof (aword), origin ); > } > } > -#endif /*__WIN64__*/ > +#endif /*__x86_64__*/ > > > } > From jussi.kivilinna at iki.fi Thu Mar 22 20:18:29 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 22 Mar 2018 21:18:29 +0200 Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly for windows In-Reply-To: <1521708975-30902-5-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> <1521708975-30902-5-git-send-email-martin@martin.st> Message-ID: <89faffe8-4964-e29f-8a69-0696b6ce93cc@iki.fi> Hello, On 22.03.2018 10:56, Martin Storsj? wrote: > Don't require .type and .size in configure; we can make > them optional via a preprocessor macro. > > This is mostly a mechanical change, wrapping the .type and .size > directives in an ELF() macro, with two actual manual changes > (when targeting windows): > - Don't load global symbols via a GOT (in chacha20) > - Don't use the x18 register (in camellia); back up and restore x19 > in the prologue/epilogue and use that instead (on windows only). > > x18 is a platform specific register; on linux, it's free to be used > by user code, while it's reserved for platform use on windows and > darwin. > ...snip... > > +#ifdef _WIN32 > +# define WIN(...) __VA_ARGS__ > +#else > +# define WIN(...) /*_*/ > +#endif > + > .text > > /* struct camellia_ctx: */ > @@ -55,12 +61,21 @@ > #define RT0 w15 > #define RT1 w16 > #define RT2 w17 > +#ifdef _WIN32 > +#define RT3 w19 > +#else > #define RT3 w18 > +#endif > > #define xRT0 x15 > #define xRT1 x16 > #define xRT2 x17 > +#ifdef _WIN32 > +/* w18/x18 is reserved and can't be used on windows. */ > +#define xRT3 x19 > +#else > #define xRT3 x18 > +#endif > Now that I know x18 is reserved for special purpose, I'd actually prefer if x18 would be switched to x19 on linux too. So, no need for _WIN32 ifdefs and WIN() macro. -Jussi From jussi.kivilinna at iki.fi Thu Mar 22 20:19:48 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 22 Mar 2018 21:19:48 +0200 Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an int parameter In-Reply-To: <1521708975-30902-4-git-send-email-martin@martin.st> References: <1521708975-30902-1-git-send-email-martin@martin.st> <1521708975-30902-4-git-send-email-martin@martin.st> Message-ID: Hello, On 22.03.2018 10:56, Martin Storsj? wrote: > The keybits parameter is declared as int, and in those cases, the > upper half of a register is undefined, not guaranteed to be zero. > > Signed-off-by: Martin Storsj? > --- > I didn't check other files and functions for the same issue, I > just happened to notice this one while looking closer at the > camellia source file. > > From previous experience, clang can be pretty aggressive with > passing in undefined data in the upper half of registers, where > it isn't supposed to make any difference. I'll do review on the other aarch64 implementations for this. -Jussi From cvs at cvs.gnupg.org Thu Mar 22 20:50:40 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Thu, 22 Mar 2018 20:50:40 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-49-g617f5e7 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 617f5e746f8295cc36d1002c8c53edc95d04d0f6 (commit) via 3841b23c0ccb24d555b7570083bba958e3126d26 (commit) via a1127dbbada4302abf09eec90fbaceca87bfcdf0 (commit) from 92fd86e9956ef3fea51d72495fd0da09522e57a1 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 617f5e746f8295cc36d1002c8c53edc95d04d0f6 Author: Jussi Kivilinna Date: Thu Mar 22 21:42:23 2018 +0200 bench-slope: add CPU frequency auto-detection * tests/bench-slope.c (bench_obj): Add 'hd'. (bench_encrypt_init, bench_encrypt_free, bench_encrypt_do_bench) (bench_decrypt_do_bench, bench_xts_encrypt_init) (bench_xts_encrypt_do_bench, bench_xts_decrypt_do_bench) (bench_ccm_encrypt_init, bench_ccm_encrypt_do_bench) (bench_ccm_decrypt_do_bench, bench_aead_encrypt_init) (bench_aead_encrypt_do_bench, bench_aead_decrypt_do_bench) (bench_hash_init, bench_hash_free, bench_hash_do_bench) (bench_mac_init, bench_mac_free, bench_mac_do_bench): Use 'obj->hd' for storing pointer to crypto context. (auto_ghz): New. (do_slope_benchmark): Rename to... (slope_benchmark): ...this. (auto_ghz_init, auto_ghz_free, auto_ghz_bench, auto_ghz_detect_ops) (get_auto_ghz, do_slope_benchmark): New. (double_to_str): Round number larger than 1000 to integer. (bench_print_result_csv, bench_print_result_std) (bench_print_result, bench_print_header, cipher_bench_one) (hash_bench_one, mac_bench_one, kdf_bench_one, kdf_bench): Add auto-detected frequency printing. (print_help): Help for CPU speed auto-detection mode. (main): Add parsing for "--cpu-mhz auto". -- Signed-off-by: Jussi Kivilinna diff --git a/tests/bench-slope.c b/tests/bench-slope.c index e34104f..5c64f22 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -50,6 +50,9 @@ static int num_measurement_repetitions; results. */ static double cpu_ghz = -1; +/* Attempt to autodetect CPU Ghz. */ +static int auto_ghz; + /* Whether we are running as part of the regression test suite. */ static int in_regression_test; @@ -220,6 +223,7 @@ struct bench_obj unsigned int step_size; void *priv; + void *hd; }; typedef int (*const bench_initialize_t) (struct bench_obj * obj); @@ -383,7 +387,7 @@ adjust_loop_iterations_to_timer_accuracy (struct bench_obj *obj, void *buffer, /* Benchmark and return linear regression slope in nanoseconds per byte. */ double -do_slope_benchmark (struct bench_obj *obj) +slope_benchmark (struct bench_obj *obj) { unsigned int num_measurements; double *measurements = NULL; @@ -464,6 +468,122 @@ err_free: return -1; } +/********************************************* CPU frequency auto-detection. */ + +static int +auto_ghz_init (struct bench_obj *obj) +{ + obj->min_bufsize = 16; + obj->max_bufsize = 64 + obj->min_bufsize; + obj->step_size = 8; + obj->num_measure_repetitions = 16; + + return 0; +} + +static void +auto_ghz_free (struct bench_obj *obj) +{ + (void)obj; +} + +static void +auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen) +{ + (void)obj; + (void)buf; + + buflen *= 1024; + + /* Turbo frequency detection benchmark. Without CPU turbo-boost, this + * function will give cycles/iteration result 1024.0 on high-end CPUs. + * With turbo, result will be less and can be used detect turbo-clock. */ + + do + { +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY + /* Use memory barrier to prevent compiler from optimizing this loop + * away. */ + + asm volatile ("":::"memory"); +#else + /* TODO: Needs alternative way. */ +#endif + } + while (--buflen); +} + +static struct bench_ops auto_ghz_detect_ops = { + &auto_ghz_init, + &auto_ghz_free, + &auto_ghz_bench +}; + + +double +get_auto_ghz (void) +{ + struct bench_obj obj = { 0 }; + double nsecs_per_iteration; + double cycles_per_iteration; + + obj.ops = &auto_ghz_detect_ops; + + nsecs_per_iteration = slope_benchmark (&obj); + + cycles_per_iteration = nsecs_per_iteration * cpu_ghz; + + /* Adjust CPU Ghz so that cycles per iteration would give '1024.0'. */ + + return cpu_ghz * 1024 / cycles_per_iteration; +} + + +double +do_slope_benchmark (struct bench_obj *obj, double *bench_ghz) +{ + double ret; + + if (!auto_ghz) + { + /* Perform measurement without autodetection of CPU frequency. */ + + ret = slope_benchmark (obj); + + *bench_ghz = cpu_ghz; + } + else + { + double cpu_auto_ghz_before; + double cpu_auto_ghz_after; + double nsecs_per_iteration; + double diff; + + /* Perform measurement with CPU frequency autodetection. */ + + do + { + /* Repeat measurement until CPU turbo frequency has stabilized. */ + + cpu_auto_ghz_before = get_auto_ghz (); + + nsecs_per_iteration = slope_benchmark (obj); + + cpu_auto_ghz_after = get_auto_ghz (); + + diff = 1.0 - (cpu_auto_ghz_before / cpu_auto_ghz_after); + diff = diff < 0 ? -diff : diff; + } + while (diff > 5e-5); + + ret = nsecs_per_iteration; + + *bench_ghz = cpu_auto_ghz_after; + } + + return ret; +} + /********************************************************** Printing results. */ @@ -476,29 +596,34 @@ double_to_str (char *out, size_t outlen, double value) fmt = "%.3f"; else if (value < 100.0) fmt = "%.2f"; - else + else if (value < 1000.0) fmt = "%.1f"; + else + fmt = "%.0f"; snprintf (out, outlen, fmt, value); } static void -bench_print_result_csv (double nsecs_per_byte) +bench_print_result_csv (double nsecs_per_byte, double bench_ghz) { double cycles_per_byte, mbytes_per_sec; char nsecpbyte_buf[16]; char mbpsec_buf[16]; char cpbyte_buf[16]; + char mhz_buf[16]; *cpbyte_buf = 0; + *mhz_buf = 0; double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte); /* If user didn't provide CPU speed, we cannot show cycles/byte results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_byte = nsecs_per_byte * cpu_ghz; + cycles_per_byte = nsecs_per_byte * bench_ghz; double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } mbytes_per_sec = @@ -506,50 +631,76 @@ bench_print_result_csv (double nsecs_per_byte) double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec); /* We print two empty fields to allow for future enhancements. */ - printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n", - current_section_name, - current_algo_name? current_algo_name : "", - current_mode_name? current_mode_name : "", - nsecpbyte_buf, - mbpsec_buf, - cpbyte_buf); - + if (auto_ghz) + { + printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B,%s,Mhz\n", + current_section_name, + current_algo_name? current_algo_name : "", + current_mode_name? current_mode_name : "", + nsecpbyte_buf, + mbpsec_buf, + cpbyte_buf, + mhz_buf); + } + else + { + printf ("%s,%s,%s,,,%s,ns/B,%s,MiB/s,%s,c/B\n", + current_section_name, + current_algo_name? current_algo_name : "", + current_mode_name? current_mode_name : "", + nsecpbyte_buf, + mbpsec_buf, + cpbyte_buf); + } } static void -bench_print_result_std (double nsecs_per_byte) +bench_print_result_std (double nsecs_per_byte, double bench_ghz) { double cycles_per_byte, mbytes_per_sec; char nsecpbyte_buf[16]; char mbpsec_buf[16]; char cpbyte_buf[16]; + char mhz_buf[16]; double_to_str (nsecpbyte_buf, sizeof (nsecpbyte_buf), nsecs_per_byte); /* If user didn't provide CPU speed, we cannot show cycles/byte results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_byte = nsecs_per_byte * cpu_ghz; + cycles_per_byte = nsecs_per_byte * bench_ghz; double_to_str (cpbyte_buf, sizeof (cpbyte_buf), cycles_per_byte); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } else - strcpy (cpbyte_buf, "-"); + { + strcpy (cpbyte_buf, "-"); + strcpy (mhz_buf, "-"); + } mbytes_per_sec = (1000.0 * 1000.0 * 1000.0) / (nsecs_per_byte * 1024 * 1024); double_to_str (mbpsec_buf, sizeof (mbpsec_buf), mbytes_per_sec); - printf ("%9s ns/B %9s MiB/s %9s c/B\n", - nsecpbyte_buf, mbpsec_buf, cpbyte_buf); + if (auto_ghz) + { + printf ("%9s ns/B %9s MiB/s %9s c/B %9s\n", + nsecpbyte_buf, mbpsec_buf, cpbyte_buf, mhz_buf); + } + else + { + printf ("%9s ns/B %9s MiB/s %9s c/B\n", + nsecpbyte_buf, mbpsec_buf, cpbyte_buf); + } } static void -bench_print_result (double nsecs_per_byte) +bench_print_result (double nsecs_per_byte, double bench_ghz) { if (csv_mode) - bench_print_result_csv (nsecs_per_byte); + bench_print_result_csv (nsecs_per_byte, bench_ghz); else - bench_print_result_std (nsecs_per_byte); + bench_print_result_std (nsecs_per_byte, bench_ghz); } static void @@ -578,8 +729,13 @@ bench_print_header (int algo_width, const char *algo_name) printf (" %-*s | ", -algo_width, algo_name); else printf (" %-*s | ", algo_width, algo_name); - printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec", - "cycles/byte"); + + if (auto_ghz) + printf ("%14s %15s %13s %9s\n", "nanosecs/byte", "mebibytes/sec", + "cycles/byte", "auto Mhz"); + else + printf ("%14s %15s %13s\n", "nanosecs/byte", "mebibytes/sec", + "cycles/byte"); } } @@ -684,7 +840,7 @@ bench_encrypt_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -692,7 +848,7 @@ bench_encrypt_init (struct bench_obj *obj) static void bench_encrypt_free (struct bench_obj *obj) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; gcry_cipher_close (hd); } @@ -700,7 +856,7 @@ bench_encrypt_free (struct bench_obj *obj) static void bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; err = gcry_cipher_encrypt (hd, buf, buflen, buf, buflen); @@ -716,7 +872,7 @@ bench_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; err = gcry_cipher_decrypt (hd, buf, buflen, buf, buflen); @@ -790,7 +946,7 @@ bench_xts_encrypt_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -798,7 +954,7 @@ bench_xts_encrypt_init (struct bench_obj *obj) static void bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; unsigned int pos; static const char tweak[16] = { 0xff, 0xff, 0xfe, }; size_t sectorlen = obj->step_size; @@ -825,7 +981,7 @@ bench_xts_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_xts_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; unsigned int pos; static const char tweak[16] = { 0xff, 0xff, 0xfe, }; size_t sectorlen = obj->step_size; @@ -865,7 +1021,7 @@ static struct bench_ops xts_decrypt_ops = { static void bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8]; char nonce[11] = { 0x80, 0x01, }; @@ -909,7 +1065,7 @@ bench_ccm_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) static void bench_ccm_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8] = { 0, }; char nonce[11] = { 0x80, 0x01, }; @@ -956,7 +1112,7 @@ static void bench_ccm_authenticate_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[8] = { 0, }; char nonce[11] = { 0x80, 0x01, }; @@ -1030,7 +1186,7 @@ static void bench_aead_encrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16]; @@ -1060,7 +1216,7 @@ static void bench_aead_decrypt_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16] = { 0, }; @@ -1093,7 +1249,7 @@ bench_aead_authenticate_do_bench (struct bench_obj *obj, void *buf, size_t buflen, const char *nonce, size_t noncelen) { - gcry_cipher_hd_t hd = obj->priv; + gcry_cipher_hd_t hd = obj->hd; int err; char tag[16] = { 0, }; char data = 0xff; @@ -1360,6 +1516,7 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode) struct bench_cipher_mode mode = *pmode; struct bench_obj obj = { 0 }; double result; + double bench_ghz; unsigned int blklen; mode.algo = algo; @@ -1404,9 +1561,9 @@ cipher_bench_one (int algo, struct bench_cipher_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } @@ -1483,7 +1640,7 @@ bench_hash_init (struct bench_obj *obj) exit (1); } - obj->priv = hd; + obj->hd = hd; return 0; } @@ -1491,7 +1648,7 @@ bench_hash_init (struct bench_obj *obj) static void bench_hash_free (struct bench_obj *obj) { - gcry_md_hd_t hd = obj->priv; + gcry_md_hd_t hd = obj->hd; gcry_md_close (hd); } @@ -1499,7 +1656,7 @@ bench_hash_free (struct bench_obj *obj) static void bench_hash_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_md_hd_t hd = obj->priv; + gcry_md_hd_t hd = obj->hd; gcry_md_reset (hd); gcry_md_write (hd, buf, buflen); @@ -1524,6 +1681,7 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode) { struct bench_hash_mode mode = *pmode; struct bench_obj obj = { 0 }; + double bench_ghz; double result; mode.algo = algo; @@ -1536,9 +1694,9 @@ hash_bench_one (int algo, struct bench_hash_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } static void @@ -1645,7 +1803,7 @@ bench_mac_init (struct bench_obj *obj) break; } - obj->priv = hd; + obj->hd = hd; free (key); return 0; @@ -1654,7 +1812,7 @@ bench_mac_init (struct bench_obj *obj) static void bench_mac_free (struct bench_obj *obj) { - gcry_mac_hd_t hd = obj->priv; + gcry_mac_hd_t hd = obj->hd; gcry_mac_close (hd); } @@ -1662,7 +1820,7 @@ bench_mac_free (struct bench_obj *obj) static void bench_mac_do_bench (struct bench_obj *obj, void *buf, size_t buflen) { - gcry_mac_hd_t hd = obj->priv; + gcry_mac_hd_t hd = obj->hd; size_t bs; char b; @@ -1690,6 +1848,7 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode) { struct bench_mac_mode mode = *pmode; struct bench_obj obj = { 0 }; + double bench_ghz; double result; mode.algo = algo; @@ -1702,9 +1861,9 @@ mac_bench_one (int algo, struct bench_mac_mode *pmode) obj.ops = mode.ops; obj.priv = &mode; - result = do_slope_benchmark (&obj); + result = do_slope_benchmark (&obj, &bench_ghz); - bench_print_result (result); + bench_print_result (result, bench_ghz); } static void @@ -1807,9 +1966,11 @@ kdf_bench_one (int algo, int subalgo) struct bench_obj obj = { 0 }; double nsecs_per_iteration; double cycles_per_iteration; + double bench_ghz; char algo_name[32]; char nsecpiter_buf[16]; char cpiter_buf[16]; + char mhz_buf[16]; mode.algo = algo; mode.subalgo = subalgo; @@ -1843,31 +2004,45 @@ kdf_bench_one (int algo, int subalgo) obj.ops = mode.ops; obj.priv = &mode; - nsecs_per_iteration = do_slope_benchmark (&obj); + nsecs_per_iteration = do_slope_benchmark (&obj, &bench_ghz); strcpy(cpiter_buf, csv_mode ? "" : "-"); + strcpy(mhz_buf, csv_mode ? "" : "-"); double_to_str (nsecpiter_buf, sizeof (nsecpiter_buf), nsecs_per_iteration); /* If user didn't provide CPU speed, we cannot show cycles/iter results. */ - if (cpu_ghz > 0.0) + if (bench_ghz > 0.0) { - cycles_per_iteration = nsecs_per_iteration * cpu_ghz; + cycles_per_iteration = nsecs_per_iteration * bench_ghz; double_to_str (cpiter_buf, sizeof (cpiter_buf), cycles_per_iteration); + double_to_str (mhz_buf, sizeof (mhz_buf), bench_ghz * 1000); } if (csv_mode) { - printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n", - current_section_name, - current_algo_name ? current_algo_name : "", - current_mode_name ? current_mode_name : "", - nsecpiter_buf, - cpiter_buf); + if (auto_ghz) + printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter,%s,Mhz\n", + current_section_name, + current_algo_name ? current_algo_name : "", + current_mode_name ? current_mode_name : "", + nsecpiter_buf, + cpiter_buf, + mhz_buf); + else + printf ("%s,%s,%s,,,,,,,,,%s,ns/iter,%s,c/iter\n", + current_section_name, + current_algo_name ? current_algo_name : "", + current_mode_name ? current_mode_name : "", + nsecpiter_buf, + cpiter_buf); } else { - printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf); + if (auto_ghz) + printf ("%14s %13s %9s\n", nsecpiter_buf, cpiter_buf, mhz_buf); + else + printf ("%14s %13s\n", nsecpiter_buf, cpiter_buf); } } @@ -1882,7 +2057,10 @@ kdf_bench (char **argv, int argc) if (!csv_mode) { printf (" %-*s | ", 24, ""); - printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter"); + if (auto_ghz) + printf ("%14s %13s %9s\n", "nanosecs/iter", "cycles/iter", "auto Mhz"); + else + printf ("%14s %13s\n", "nanosecs/iter", "cycles/iter"); } if (argv && argc) @@ -1923,7 +2101,8 @@ print_help (void) "", " options:", " --cpu-mhz Set CPU speed for calculating cycles", - " per bytes results.", + " per bytes results. Set as \"auto\"", + " for auto-detection of CPU speed.", " --disable-hwf Disable hardware acceleration feature(s)", " for benchmarking.", " --repetitions Use N repetitions (default " @@ -2039,8 +2218,15 @@ main (int argc, char **argv) argv++; if (argc) { - cpu_ghz = atof (*argv); - cpu_ghz /= 1000; /* Mhz => Ghz */ + if (!strcmp (*argv, "auto")) + { + auto_ghz = 1; + } + else + { + cpu_ghz = atof (*argv); + cpu_ghz /= 1000; /* Mhz => Ghz */ + } argc--; argv++; commit 3841b23c0ccb24d555b7570083bba958e3126d26 Author: Jussi Kivilinna Date: Thu Mar 22 21:42:23 2018 +0200 _gcry_burn_stack: use memset for clearing memory * src/misc.c (__gcry_burn_stack) [HAVE_VLA]: Use 'memset' for clearing stack. -- Patch switches stacking burning to use faster memset instead of wipememory. Memset is accessed through volatile function pointer, so that compiler will not optimize away the call. Signed-off-by: Jussi Kivilinna diff --git a/src/misc.c b/src/misc.c index 002a84f..47d2dc7 100644 --- a/src/misc.c +++ b/src/misc.c @@ -501,11 +501,12 @@ void __gcry_burn_stack (unsigned int bytes) { #ifdef HAVE_VLA + static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ unsigned int buflen = ((!bytes + bytes) + 63) & ~63; - volatile char buf[buflen]; + char buf[buflen]; - wipememory (buf, sizeof buf); + memset_ptr (buf, 0, sizeof buf); #else volatile char buf[64]; commit a1127dbbada4302abf09eec90fbaceca87bfcdf0 Author: Jussi Kivilinna Date: Thu Mar 22 21:42:22 2018 +0200 Improve constant-time buffer compare * cipher/bufhelp.h (buf_eq_const): Rewrite logic. -- New implementation for constant-time buffer comparing that avoids generating conditional code in comparison loop. Signed-off-by: Jussi Kivilinna diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h index b854bc0..83d3f53 100644 --- a/cipher/bufhelp.h +++ b/cipher/bufhelp.h @@ -290,13 +290,19 @@ buf_eq_const(const void *_a, const void *_b, size_t len) { const byte *a = _a; const byte *b = _b; - size_t diff, i; + int ab, ba; + size_t i; /* Constant-time compare. */ - for (i = 0, diff = 0; i < len; i++) - diff -= !!(a[i] - b[i]); + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + /* If a[i] != b[i], either ab or ba will be negative. */ + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } - return !diff; + /* 'ab | ba' is negative when buffers are not equal. */ + return (ab | ba) >= 0; } ----------------------------------------------------------------------- Summary of changes: cipher/bufhelp.h | 14 ++- src/misc.c | 5 +- tests/bench-slope.c | 312 +++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 262 insertions(+), 69 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Thu Mar 22 21:14:57 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 22 Mar 2018 22:14:57 +0200 Subject: [PATCH] tests/aeswrap: add in-place encryption/decryption testing Message-ID: <152174969706.13724.8178957584763301213.stgit@localhost.localdomain> * tests/aeswrap.c (check): Rename to... (check_one): ...this and add in-place testing. (check): New. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/tests/aeswrap.c b/tests/aeswrap.c index 90add111c..dbbd7dd9a 100644 --- a/tests/aeswrap.c +++ b/tests/aeswrap.c @@ -31,10 +31,11 @@ static void -check (int algo, - const void *kek, size_t keklen, - const void *data, size_t datalen, - const void *expected, size_t expectedlen) +check_one (int algo, + const void *kek, size_t keklen, + const void *data, size_t datalen, + const void *expected, size_t expectedlen, + int inplace) { gcry_error_t err; gcry_cipher_hd_t hd; @@ -57,9 +58,19 @@ check (int algo, outbuflen = datalen + 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, data, datalen); + err = gcry_cipher_encrypt (hd, outbuf, outbuflen, outbuf, datalen); + } else - err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen); + { + err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen); + } + if (err) { fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err)); @@ -71,7 +82,7 @@ check (int algo, const unsigned char *s; int i; - fail ("mismatch at encryption!\n"); + fail ("mismatch at encryption!%s\n", inplace ? " (inplace)" : ""); fprintf (stderr, "computed: "); for (i = 0; i < outbuflen; i++) fprintf (stderr, "%02x ", outbuf[i]); @@ -84,9 +95,19 @@ check (int algo, outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err)); @@ -98,7 +119,7 @@ check (int algo, const unsigned char *s; int i; - fail ("mismatch at decryption!\n"); + fail ("mismatch at decryption!%s\n", inplace ? " (inplace)" : ""); fprintf (stderr, "computed: "); for (i = 0; i < outbuflen; i++) fprintf (stderr, "%02x ", outbuf[i]); @@ -113,9 +134,19 @@ check (int algo, outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err)); @@ -123,14 +154,24 @@ check (int algo, } if (outbuflen != datalen || memcmp (outbuf, data, datalen)) - fail ("mismatch at decryption(2)!\n"); + fail ("mismatch at decryption(2)!%s\n", inplace ? " (inplace)" : ""); - /* And once ore without a key reset. */ + /* And once more without a key reset. */ outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err)); @@ -138,12 +179,23 @@ check (int algo, } if (outbuflen != datalen || memcmp (outbuf, data, datalen)) - fail ("mismatch at decryption(3)!\n"); + fail ("mismatch at decryption(3)!%s\n", inplace ? " (inplace)" : ""); gcry_cipher_close (hd); } +static void +check (int algo, + const void *kek, size_t keklen, + const void *data, size_t datalen, + const void *expected, size_t expectedlen) +{ + check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 0); + check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 1); +} + + static void check_all (void) { From cvs at cvs.gnupg.org Thu Mar 22 21:24:03 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Thu, 22 Mar 2018 21:24:03 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-51-g885f031 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 885f031fbd17abc1c0fedbb98df22823b647fc11 (commit) via 330ec66e0babdabb658dc7d6db78f37b2a1b996e (commit) from 617f5e746f8295cc36d1002c8c53edc95d04d0f6 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 885f031fbd17abc1c0fedbb98df22823b647fc11 Author: Jussi Kivilinna Date: Thu Mar 22 21:54:20 2018 +0200 tests/aeswrap: add in-place encryption/decryption testing * tests/aeswrap.c (check): Rename to... (check_one): ...this and add in-place testing. (check): New. -- Signed-off-by: Jussi Kivilinna diff --git a/tests/aeswrap.c b/tests/aeswrap.c index 90add11..dbbd7dd 100644 --- a/tests/aeswrap.c +++ b/tests/aeswrap.c @@ -31,10 +31,11 @@ static void -check (int algo, - const void *kek, size_t keklen, - const void *data, size_t datalen, - const void *expected, size_t expectedlen) +check_one (int algo, + const void *kek, size_t keklen, + const void *data, size_t datalen, + const void *expected, size_t expectedlen, + int inplace) { gcry_error_t err; gcry_cipher_hd_t hd; @@ -57,9 +58,19 @@ check (int algo, outbuflen = datalen + 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, data, datalen); + err = gcry_cipher_encrypt (hd, outbuf, outbuflen, outbuf, datalen); + } else - err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen); + { + err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen); + } + if (err) { fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err)); @@ -71,7 +82,7 @@ check (int algo, const unsigned char *s; int i; - fail ("mismatch at encryption!\n"); + fail ("mismatch at encryption!%s\n", inplace ? " (inplace)" : ""); fprintf (stderr, "computed: "); for (i = 0; i < outbuflen; i++) fprintf (stderr, "%02x ", outbuf[i]); @@ -84,9 +95,19 @@ check (int algo, outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err)); @@ -98,7 +119,7 @@ check (int algo, const unsigned char *s; int i; - fail ("mismatch at decryption!\n"); + fail ("mismatch at decryption!%s\n", inplace ? " (inplace)" : ""); fprintf (stderr, "computed: "); for (i = 0; i < outbuflen; i++) fprintf (stderr, "%02x ", outbuf[i]); @@ -113,9 +134,19 @@ check (int algo, outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err)); @@ -123,14 +154,24 @@ check (int algo, } if (outbuflen != datalen || memcmp (outbuf, data, datalen)) - fail ("mismatch at decryption(2)!\n"); + fail ("mismatch at decryption(2)!%s\n", inplace ? " (inplace)" : ""); - /* And once ore without a key reset. */ + /* And once more without a key reset. */ outbuflen = expectedlen - 8; if (outbuflen > sizeof outbuf) - err = gpg_error (GPG_ERR_INTERNAL); + { + err = gpg_error (GPG_ERR_INTERNAL); + } + else if (inplace) + { + memcpy (outbuf, expected, expectedlen); + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen); + } else - err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + { + err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen); + } + if (err) { fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err)); @@ -138,13 +179,24 @@ check (int algo, } if (outbuflen != datalen || memcmp (outbuf, data, datalen)) - fail ("mismatch at decryption(3)!\n"); + fail ("mismatch at decryption(3)!%s\n", inplace ? " (inplace)" : ""); gcry_cipher_close (hd); } static void +check (int algo, + const void *kek, size_t keklen, + const void *data, size_t datalen, + const void *expected, size_t expectedlen) +{ + check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 0); + check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 1); +} + + +static void check_all (void) { if (verbose) commit 330ec66e0babdabb658dc7d6db78f37b2a1b996e Author: Stephan Mueller Date: Mon Mar 12 22:24:37 2018 +0100 AES-KW: fix in-place encryption * cipher/cipher-aeswrap.c: move memmove call before KW IV setting -- In case AES-KW in-place encryption is performed, the plaintext must be moved to the correct destination location before the first semiblock of the destination buffer is modified. Without the patch, the first semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6. Signed-off-by: Stephan Mueller diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c index 698742d..a8d0e03 100644 --- a/cipher/cipher-aeswrap.c +++ b/cipher/cipher-aeswrap.c @@ -70,6 +70,9 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, a = outbuf; /* We store A directly in OUTBUF. */ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + /* Copy the inbuf to the outbuf. */ + memmove (r+8, inbuf, inbuflen); + /* If an IV has been set we use that IV as the Alternative Initial Value; if it has not been set we use the standard value. */ if (c->marks.iv) @@ -77,9 +80,6 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, else memset (a, 0xa6, 8); - /* Copy the inbuf to the outbuf. */ - memmove (r+8, inbuf, inbuflen); - memset (t, 0, sizeof t); /* t := 0. */ for (j = 0; j <= 5; j++) ----------------------------------------------------------------------- Summary of changes: cipher/cipher-aeswrap.c | 6 ++-- tests/aeswrap.c | 86 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 20 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Thu Mar 22 21:29:15 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 22 Mar 2018 22:29:15 +0200 Subject: [PATCH] AES-KW: fix in-place encryption In-Reply-To: <10870564.SVcAGkk7tg@tauon.chronox.de> References: <4386046.jcxHWR5DvT@tauon.chronox.de> <10870564.SVcAGkk7tg@tauon.chronox.de> Message-ID: <89d5b0b0-b7f3-8c2f-34b2-d0cec3e378ab@iki.fi> On 12.03.2018 23:24, Stephan Mueller wrote: > In case AES-KW in-place encryption is performed, the plaintext must be > moved to the correct destination location before the first semiblock of > the destination buffer is modified. Without the patch, the first > semiblock of the plaintext is overwritten with a6a6a6a6a6a6a6a6. > > * cipher/cipher-aeswrap.c: move memove call before KW IV setting > Thanks, applied and pushed. -Jussi From martin at martin.st Thu Mar 22 22:32:36 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 23:32:36 +0200 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 Message-ID: <1521754360-5806-1-git-send-email-martin@martin.st> * random/rndw32.c: Change _WIN64 ifdef into __x86_64__. -- This fixes building this file for windows on aarch64. Signed-off-by: Martin Storsj? --- random/rndw32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/random/rndw32.c b/random/rndw32.c index 7e9ac50..08a8867 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, On AMD64, TSC is always available and intrinsic is provided for accessing it. */ -#ifdef __WIN64__ +#ifdef __x86_64__ { unsigned __int64 aint64; @@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, (*add) (&aword, sizeof (aword), origin ); } } -#endif /*__WIN64__*/ +#endif /*__x86_64__*/ } -- 2.7.4 From martin at martin.st Thu Mar 22 22:32:38 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 23:32:38 +0200 Subject: [PATCH 3/5] aarch64: Fix assembling chacha20-aarch64.S with clang/llvm In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st> References: <1521754360-5806-1-git-send-email-martin@martin.st> Message-ID: <1521754360-5806-3-git-send-email-martin@martin.st> * cipher/chacha20-aarch64.S: Remove superfluous lane counts. -- When referring to a specific lane, one doesn't need to specify the total number of lanes of the register. With GNU binutils, both forms are accepted, while clang/llvm rejects the form with the unnecessary number of lanes. Signed-off-by: Martin Storsj? --- cipher/chacha20-aarch64.S | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 739ddde..5990a08 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4: mov ROUND, #20; ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; - dup X12.4s, X15.4s[0]; - dup X13.4s, X15.4s[1]; + dup X12.4s, X15.s[0]; + dup X13.4s, X15.s[1]; ldr CTR, [INPUT_CTR]; add X12.4s, X12.4s, VCTR.4s; - dup X0.4s, VTMP1.4s[0]; - dup X1.4s, VTMP1.4s[1]; - dup X2.4s, VTMP1.4s[2]; - dup X3.4s, VTMP1.4s[3]; - dup X14.4s, X15.4s[2]; + dup X0.4s, VTMP1.s[0]; + dup X1.4s, VTMP1.s[1]; + dup X2.4s, VTMP1.s[2]; + dup X3.4s, VTMP1.s[3]; + dup X14.4s, X15.s[2]; cmhi VTMP0.4s, VCTR.4s, X12.4s; - dup X15.4s, X15.4s[3]; + dup X15.4s, X15.s[3]; add CTR, CTR, #4; /* Update counter */ - dup X4.4s, VTMP2.4s[0]; - dup X5.4s, VTMP2.4s[1]; - dup X6.4s, VTMP2.4s[2]; - dup X7.4s, VTMP2.4s[3]; + dup X4.4s, VTMP2.s[0]; + dup X5.4s, VTMP2.s[1]; + dup X6.4s, VTMP2.s[2]; + dup X7.4s, VTMP2.s[3]; sub X13.4s, X13.4s, VTMP0.4s; - dup X8.4s, VTMP3.4s[0]; - dup X9.4s, VTMP3.4s[1]; - dup X10.4s, VTMP3.4s[2]; - dup X11.4s, VTMP3.4s[3]; + dup X8.4s, VTMP3.s[0]; + dup X9.4s, VTMP3.s[1]; + dup X10.4s, VTMP3.s[2]; + dup X11.4s, VTMP3.s[3]; mov X12_TMP.16b, X12.16b; mov X13_TMP.16b, X13.16b; str CTR, [INPUT_CTR]; @@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ PLUS(X0, VTMP2); PLUS(X1, VTMP3); PLUS(X2, X12_TMP); PLUS(X3, X13_TMP); - dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */ - dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */ - dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */ - dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */ + dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ + dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ + dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ + dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; mov INPUT_POS, INPUT; PLUS(X4, VTMP2); @@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X6, X12_TMP); PLUS(X7, X13_TMP); - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */ - dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */ - dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ + dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ + dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ PLUS(X8, VTMP2); PLUS(X9, VTMP3); PLUS(X10, X12_TMP); -- 2.7.4 From martin at martin.st Thu Mar 22 22:32:40 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 23:32:40 +0200 Subject: [PATCH 5/5] aarch64: Enable building the aarch64 cipher assembly for windows In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st> References: <1521754360-5806-1-git-send-email-martin@martin.st> Message-ID: <1521754360-5806-5-git-send-email-martin@martin.st> * cipher/asm-common-aarch64.h: New. * cipher/camellia-aarch64.S: Use ELF macro, use x19 instead of x18. * cipher/chacha20-aarch64.S: Use ELF macro, don't use GOT on windows. * cipher/cipher-gcm-armv8-aarch64-ce.S: Use ELF macro. * cipher/rijndael-aarch64.S: Use ELF macro. * cipher/rijndael-armv8-aarch64-ce.S: Use ELF macro. * cipher/sha1-armv8-aarch64-ce.S: Use ELF macro. * cipher/sha256-armv8-aarch64-ce.S: Use ELF macro. * cipher/twofish-aarch64.S: Use ELF macro. * configure.ac: Don't require .size and .type in aarch64 assembly check. -- Don't require .type and .size in configure; we can make them optional via a preprocessor macro. This is mostly a mechanical change, wrapping the .type and .size directives in an ELF() macro, with two actual manual changes: (when targeting windows): - Don't load global symbols via a GOT (in chacha20) - Don't use the x18 register (in camellia); back up and restore x19 in the prologue/epilogue and use that instead. x18 is a platform specific register; on linux, it's free to be used by user code, while it's reserved for platform use on windows and darwin. Always use x19 instead of x18 for consistency. Signed-off-by: Martin Storsj? --- This isn't strictly necessary for building libgcrypt for windows on aarch64; previously configure concludes that the assembly can't be built since the .type and .size directives don't work. This just allows using more of the existing assembly routines. This also probably has the effect that the same assembly gets enabled when targeting darwin (iOS), but building with assembly enabled doesn't work for darwin anyway (even prior to this change, since darwin requires an extra leading underscore on all symbols, while the mpi/aarch64 code gets automatically enabled). --- cipher/asm-common-aarch64.h | 32 ++++++++++++++++++++ cipher/camellia-aarch64.S | 23 ++++++++------ cipher/chacha20-aarch64.S | 12 ++++++-- cipher/cipher-gcm-armv8-aarch64-ce.S | 10 +++---- cipher/rijndael-aarch64.S | 10 +++---- cipher/rijndael-armv8-aarch64-ce.S | 58 ++++++++++++++++++------------------ cipher/sha1-armv8-aarch64-ce.S | 6 ++-- cipher/sha256-armv8-aarch64-ce.S | 6 ++-- cipher/twofish-aarch64.S | 10 +++---- configure.ac | 4 --- 10 files changed, 105 insertions(+), 66 deletions(-) create mode 100644 cipher/asm-common-aarch64.h diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h new file mode 100644 index 0000000..814b7ad --- /dev/null +++ b/cipher/asm-common-aarch64.h @@ -0,0 +1,32 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#include + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 68d2a7d..c3cc463 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -19,7 +19,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -55,12 +55,12 @@ #define RT0 w15 #define RT1 w16 #define RT2 w17 -#define RT3 w18 +#define RT3 w19 #define xRT0 x15 #define xRT1 x16 #define xRT2 x17 -#define xRT3 x18 +#define xRT3 x19 #ifdef __AARCH64EL__ #define host_to_be(reg, rtmp) \ @@ -198,9 +198,10 @@ str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); .globl _gcry_camellia_arm_encrypt_block -.type _gcry_camellia_arm_encrypt_block, at function; +ELF(.type _gcry_camellia_arm_encrypt_block, at function;) _gcry_camellia_arm_encrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst @@ -227,6 +228,7 @@ _gcry_camellia_arm_encrypt_block: outunpack(24); + ldp x19, x30, [sp], #16 ret; .ltorg @@ -236,14 +238,16 @@ _gcry_camellia_arm_encrypt_block: outunpack(32); + ldp x19, x30, [sp], #16 ret; .ltorg -.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; +ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;) .globl _gcry_camellia_arm_decrypt_block -.type _gcry_camellia_arm_decrypt_block, at function; +ELF(.type _gcry_camellia_arm_decrypt_block, at function;) _gcry_camellia_arm_decrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst @@ -271,6 +275,7 @@ _gcry_camellia_arm_decrypt_block: outunpack(0); + ldp x19, x30, [sp], #16 ret; .ltorg @@ -281,11 +286,11 @@ _gcry_camellia_arm_decrypt_block: b .Ldec_128; .ltorg -.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; +ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;) /* Encryption/Decryption tables */ .globl _gcry_camellia_arm_tables -.type _gcry_camellia_arm_tables, at object; +ELF(.type _gcry_camellia_arm_tables, at object;) .balign 32 _gcry_camellia_arm_tables: .Lcamellia_sp1110: @@ -551,7 +556,7 @@ _gcry_camellia_arm_tables: .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e -.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables; +ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 5990a08..3844d4e 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -27,7 +27,7 @@ * Public domain. */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -38,9 +38,15 @@ .text +#ifdef _WIN32 +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name ; \ + add reg, reg, #:lo12:name ; +#else #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; +#endif /* register macros */ #define INPUT x0 @@ -148,7 +154,7 @@ chacha20_data: .align 3 .globl _gcry_chacha20_aarch64_blocks4 -.type _gcry_chacha20_aarch64_blocks4,%function; +ELF(.type _gcry_chacha20_aarch64_blocks4,%function;) _gcry_chacha20_aarch64_blocks4: /* input: @@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4: eor x0, x0, x0 ret -.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4; +ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;) #endif diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index 0cfaf1c..b6c4f59 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -174,7 +174,7 @@ gcry_gcm_reduction_constant: */ .align 3 .globl _gcry_ghash_armv8_ce_pmull -.type _gcry_ghash_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_armv8_ce_pmull,%function;) _gcry_ghash_armv8_ce_pmull: /* input: * x0: gcm_key @@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull: .Ldo_nothing: mov x0, #0 ret -.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; +ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;) /* @@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull: */ .align 3 .globl _gcry_ghash_setup_armv8_ce_pmull -.type _gcry_ghash_setup_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;) _gcry_ghash_setup_armv8_ce_pmull: /* input: * x0: gcm_key @@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull: st1 {rh5.16b-rh6.16b}, [x1] ret -.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; +ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;) #endif diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S index e533bbe..aad7487 100644 --- a/cipher/rijndael-aarch64.S +++ b/cipher/rijndael-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -206,7 +206,7 @@ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_encrypt_block -.type _gcry_aes_arm_encrypt_block,%function; +ELF(.type _gcry_aes_arm_encrypt_block,%function;) _gcry_aes_arm_encrypt_block: /* input: @@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block: lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; -.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; +ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;) #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ @@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block: addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_decrypt_block -.type _gcry_aes_arm_decrypt_block,%function; +ELF(.type _gcry_aes_arm_decrypt_block,%function;) _gcry_aes_arm_decrypt_block: /* input: @@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block: decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; -.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block; +ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__ */ diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 40097a7..5859557 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -239,7 +239,7 @@ */ .align 3 .globl _gcry_aes_enc_armv8_ce -.type _gcry_aes_enc_armv8_ce,%function; +ELF(.type _gcry_aes_enc_armv8_ce,%function;) _gcry_aes_enc_armv8_ce: /* input: * x0: keysched @@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Lenc1_tail -.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; +ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;) /* @@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce: */ .align 3 .globl _gcry_aes_dec_armv8_ce -.type _gcry_aes_dec_armv8_ce,%function; +ELF(.type _gcry_aes_dec_armv8_ce,%function;) _gcry_aes_dec_armv8_ce: /* input: * x0: keysched @@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Ldec1_tail -.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; +ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;) /* @@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce: .align 3 .globl _gcry_aes_cbc_enc_armv8_ce -.type _gcry_aes_cbc_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_enc_armv8_ce,%function;) _gcry_aes_cbc_enc_armv8_ce: /* input: * x0: keysched @@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce: .Lcbc_enc_skip: ret -.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; +ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;) /* * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, @@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce: .align 3 .globl _gcry_aes_cbc_dec_armv8_ce -.type _gcry_aes_cbc_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_dec_armv8_ce,%function;) _gcry_aes_cbc_dec_armv8_ce: /* input: * x0: keysched @@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce: .Lcbc_dec_skip: ret -.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; +ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;) /* @@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce: .align 3 .globl _gcry_aes_ctr_enc_armv8_ce -.type _gcry_aes_ctr_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ctr_enc_armv8_ce,%function;) _gcry_aes_ctr_enc_armv8_ce: /* input: * r0: keysched @@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce: .Lctr_enc_skip: ret -.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; +ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;) /* @@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_enc_armv8_ce -.type _gcry_aes_cfb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_enc_armv8_ce,%function;) _gcry_aes_cfb_enc_armv8_ce: /* input: * r0: keysched @@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce: .Lcfb_enc_skip: ret -.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; +ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;) /* @@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_dec_armv8_ce -.type _gcry_aes_cfb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_dec_armv8_ce,%function;) _gcry_aes_cfb_dec_armv8_ce: /* input: * r0: keysched @@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce: .Lcfb_dec_skip: ret -.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; +ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;) /* @@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_enc_armv8_ce -.type _gcry_aes_ocb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_enc_armv8_ce,%function;) _gcry_aes_ocb_enc_armv8_ce: /* input: * x0: keysched @@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; +ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;) /* @@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce: .align 3 .globl _gcry_aes_ocb_dec_armv8_ce -.type _gcry_aes_ocb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_dec_armv8_ce,%function;) _gcry_aes_ocb_dec_armv8_ce: /* input: * x0: keysched @@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; +ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;) /* @@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_auth_armv8_ce -.type _gcry_aes_ocb_auth_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_auth_armv8_ce,%function;) _gcry_aes_ocb_auth_armv8_ce: /* input: * x0: keysched @@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; +ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) /* @@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce: .align 3 .globl _gcry_aes_xts_enc_armv8_ce -.type _gcry_aes_xts_enc_armv8_ce,%function; +ELF(.type _gcry_aes_xts_enc_armv8_ce,%function;) _gcry_aes_xts_enc_armv8_ce: /* input: * r0: keysched @@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce: .Lxts_enc_skip: ret -.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; +ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) /* @@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce: .align 3 .globl _gcry_aes_xts_dec_armv8_ce -.type _gcry_aes_xts_dec_armv8_ce,%function; +ELF(.type _gcry_aes_xts_dec_armv8_ce,%function;) _gcry_aes_xts_dec_armv8_ce: /* input: * r0: keysched @@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce: .Lxts_dec_skip: ret -.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; +ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;) /* @@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce: */ .align 3 .globl _gcry_aes_sbox4_armv8_ce -.type _gcry_aes_sbox4_armv8_ce,%function; +ELF(.type _gcry_aes_sbox4_armv8_ce,%function;) _gcry_aes_sbox4_armv8_ce: /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in * Cryptology ? CT-RSA 2015" for details. @@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce: mov w0, v0.S[0] CLEAR_REG(v0) ret -.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; +ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;) /* @@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce: */ .align 3 .globl _gcry_aes_invmixcol_armv8_ce -.type _gcry_aes_invmixcol_armv8_ce,%function; +ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;) _gcry_aes_invmixcol_armv8_ce: ld1 {v0.16b}, [x1] aesimc v0.16b, v0.16b st1 {v0.16b}, [x0] CLEAR_REG(v0) ret -.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; +ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;) #endif diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S index ec1810d..aeb67a1 100644 --- a/cipher/sha1-armv8-aarch64-ce.S +++ b/cipher/sha1-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC: */ .align 3 .globl _gcry_sha1_transform_armv8_ce -.type _gcry_sha1_transform_armv8_ce,%function; +ELF(.type _gcry_sha1_transform_armv8_ce,%function;) _gcry_sha1_transform_armv8_ce: /* input: * x0: ctx, CTX @@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; +ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;) #endif diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S index a4575da..6b3ad32 100644 --- a/cipher/sha256-armv8-aarch64-ce.S +++ b/cipher/sha256-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K: */ .align 3 .globl _gcry_sha256_transform_armv8_ce -.type _gcry_sha256_transform_armv8_ce,%function; +ELF(.type _gcry_sha256_transform_armv8_ce,%function;) _gcry_sha256_transform_armv8_ce: /* input: * r0: ctx, CTX @@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; +ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;) #endif diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S index 99c4675..adee412 100644 --- a/cipher/twofish-aarch64.S +++ b/cipher/twofish-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -217,7 +217,7 @@ ror1(RD); .globl _gcry_twofish_arm_encrypt_block -.type _gcry_twofish_arm_encrypt_block,%function; +ELF(.type _gcry_twofish_arm_encrypt_block,%function;) _gcry_twofish_arm_encrypt_block: /* input: @@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block: ret; .ltorg -.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; +ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;) .globl _gcry_twofish_arm_decrypt_block -.type _gcry_twofish_arm_decrypt_block,%function; +ELF(.type _gcry_twofish_arm_decrypt_block,%function;) _gcry_twofish_arm_decrypt_block: /* input: @@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block: str_output_le(RDST, RA, RB, RC, RD, RT0, RT1); ret; -.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; +ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/configure.ac b/configure.ac index b5d7211..330485f 100644 --- a/configure.ac +++ b/configure.ac @@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" - - /* Test if '.type' and '.size' are supported. */ - ".size asmfunc,.-asmfunc;\n\t" - ".type asmfunc, at function;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then -- 2.7.4 From martin at martin.st Thu Mar 22 22:32:39 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 23:32:39 +0200 Subject: [PATCH 4/5] aarch64: camellia: Only use the lower 32 bit of an int parameter In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st> References: <1521754360-5806-1-git-send-email-martin@martin.st> Message-ID: <1521754360-5806-4-git-send-email-martin@martin.st> * cipher/camellia-aarch64.S: Use 'w3' instead of 'x3'. -- The keybits parameter is declared as int, and in those cases, the upper half of a register is undefined, not guaranteed to be zero. Signed-off-by: Martin Storsj? --- cipher/camellia-aarch64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 440f69f..68d2a7d 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -33,7 +33,7 @@ #define CTX x0 #define RDST x1 #define RSRC x2 -#define RKEYBITS x3 +#define RKEYBITS w3 #define RTAB1 x4 #define RTAB2 x5 -- 2.7.4 From martin at martin.st Thu Mar 22 22:32:37 2018 From: martin at martin.st (=?UTF-8?q?Martin=20Storsj=C3=B6?=) Date: Thu, 22 Mar 2018 23:32:37 +0200 Subject: [PATCH 2/5] aarch64: mpi: Fix building the mpi aarch64 assembly for windows In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st> References: <1521754360-5806-1-git-send-email-martin@martin.st> Message-ID: <1521754360-5806-2-git-send-email-martin@martin.st> * mpi/aarch64/mpih-add1.S: Use ELF macro. * mpi/aarch64/mpih-mul1.S: Use ELF macro. * mpi/aarch64/mpih-mul2.S: Use ELF macro. * mpi/aarch64/mpih-mul3.S: Use ELF macro. * mpi/aarch64/mpih-sub1.S: Use ELF macro. * mpi/asm-common-aarch64.h: New. -- The mpi aarch64 assembly is enabled as soon as the compiler supports inline assembly, without checking for .type and .size, as is done for the rest of the assembly in cipher/*.S. (The .type and .size directives are only supported on ELF.) Signed-off-by: Martin Storsj? --- mpi/aarch64/mpih-add1.S | 5 +++-- mpi/aarch64/mpih-mul1.S | 5 +++-- mpi/aarch64/mpih-mul2.S | 5 +++-- mpi/aarch64/mpih-mul3.S | 5 +++-- mpi/aarch64/mpih-sub1.S | 5 +++-- mpi/asm-common-aarch64.h | 30 ++++++++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 mpi/asm-common-aarch64.h diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index fa8cd01..4ead1c2 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_add_n -.type _gcry_mpih_add_n,%function +ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: and x5, x3, #3; adds xzr, xzr, xzr; /* clear carry flag */ @@ -68,4 +69,4 @@ _gcry_mpih_add_n: .Lend: adc x0, xzr, xzr; ret; -.size _gcry_mpih_add_n,.-_gcry_mpih_add_n; +ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;) diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 65e98fe..8a86269 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_mul_1 -.type _gcry_mpih_mul_1,%function +ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: and x5, x2, #3; mov x4, xzr; @@ -93,4 +94,4 @@ _gcry_mpih_mul_1: .Lend: mov x0, x4; ret; -.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1; +ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;) diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index bd3b2c9..c7c08e5 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_addmul_1 -.type _gcry_mpih_addmul_1,%function +ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: and x5, x2, #3; mov x6, xzr; @@ -105,4 +106,4 @@ _gcry_mpih_addmul_1: .Lend: mov x0, x6; ret; -.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1; +ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;) diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index a58bc53..ccc961e 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_submul_1 -.type _gcry_mpih_submul_1,%function +ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: and x5, x2, #3; mov x7, xzr; @@ -118,4 +119,4 @@ _gcry_mpih_submul_1: .Loop_end: cinc x0, x7, cc; ret; -.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1; +ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;) diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index cbf2f08..4a66373 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_sub_n -.type _gcry_mpih_sub_n,%function +ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: and x5, x3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ @@ -68,4 +69,4 @@ _gcry_mpih_sub_n: .Lend: cset x0, cc; ret; -.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n; +ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;) diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h new file mode 100644 index 0000000..1269413 --- /dev/null +++ b/mpi/asm-common-aarch64.h @@ -0,0 +1,30 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef MPI_ASM_COMMON_AARCH64_H +#define MPI_ASM_COMMON_AARCH64_H + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* MPI_ASM_COMMON_AARCH64_H */ -- 2.7.4 From martin at martin.st Thu Mar 22 21:54:30 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Thu, 22 Mar 2018 22:54:30 +0200 (EET) Subject: DCO Message-ID: Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Martin Storsj? -------------- next part -------------- Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Martin Storsj? -------------- next part -------------- A non-text attachment was scrubbed... Name: DCO.sig Type: application/pgp-signature Size: 566 bytes Desc: URL: From martin at martin.st Thu Mar 22 22:05:51 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Thu, 22 Mar 2018 23:05:51 +0200 (EET) Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: Moikka, On Thu, 22 Mar 2018, Jussi Kivilinna wrote: > On 22.03.2018 10:56, Martin Storsj? wrote: >> This fixes building this file for windows on aarch64. >> >> Signed-off-by: Martin Storsj? > > Thanks for the patches. There is few generic things that needs to be done > before these can be applied: > - Check signing off DCO part in doc/HACKING Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup. > - Add ChangeLog entries to beginning of commit log, see existing commit > messages for example. Ok, will try to - I'll resend patches with this taken care of. > I'm interested in testing these patches myself. Can you give some pointers > for how to cross-compile libgcrypt for windows/aarch64? Absolutely. First off, you need llvm-mingw: https://github.com/mstorsjo/llvm-mingw The first step in this consists of building a recent pinned version of LLVM+clang, which takes a nontrivial amount of time (a bit over 30 min on an average machine), but I think a prebuilt version of the recent 6.0.0 release [1] should also be fine. (I can't think of any changes since the 6.0 branch that would be relevant for gcrypt.) If you use a prebuilt clang, you can skip the build-llvm.sh step. After building that, you should have a set of {i686,x86_64,armv7,aarch64}-w64-mingw32- tools just like in normal mingw setups. For these patches, I'm configuring with --host=aarch64-w64-mingw32 and nothing else. In case you're interested in testing building in armv7 mode, you'll run into similar issues as for aarch64 (the ELF specific .type/.size aren't supported etc), but the build for that target succeeded out of the box so there wasn't quite as much need for patches there. One main caveat to know about windows on armv7 is that it's thumb-only, which can be an issue if there's assembly that isn't ready to be assembled in that mode. If you actually want to try running binaries as well, you can actually get surprisingly far with running things in wine on aarch64 linux. For best results, you need a wine built with a recent clang (>= clang 5.0) with a patch [2] that isn't yet merged in upstream wine. // Martin [1] http://releases.llvm.org/download.html#6.0.0 [2] https://source.winehq.org/patches/data/136484 From jussi.kivilinna at iki.fi Sun Mar 25 16:30:16 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 25 Mar 2018 17:30:16 +0300 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: Moi, On 22.03.2018 23:05, Martin Storsj? wrote: > Moikka, > > On Thu, 22 Mar 2018, Jussi Kivilinna wrote: > >> On 22.03.2018 10:56, Martin Storsj? wrote: >>> This fixes building this file for windows on aarch64. >>> >>> Signed-off-by: Martin Storsj? >> >> Thanks for the patches. There is few generic things that needs to be done >> before these can be applied: >> - Check signing off DCO part in doc/HACKING > > Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup. > I'd also need to verify the signed DCO, so you would need to export your public key (maybe by send it to key-server). >> - Add ChangeLog entries to beginning of commit log, see existing commit >> ? messages for example. > > Ok, will try to - I'll resend patches with this taken care of. > >> I'm interested in testing these patches myself. Can you give some pointers >> for how to cross-compile libgcrypt for windows/aarch64? > > Absolutely. First off, you need llvm-mingw: https://github.com/mstorsjo/llvm-mingw > > The first step in this consists of building a recent pinned version of LLVM+clang, which takes a nontrivial amount of time (a bit over 30 min on an average machine), but I think a prebuilt version of the recent 6.0.0 release [1] should also be fine. (I can't think of any changes since the 6.0 branch that would be relevant for gcrypt.) If you use a prebuilt clang, you can skip the build-llvm.sh step. > > After building that, you should have a set of {i686,x86_64,armv7,aarch64}-w64-mingw32- tools just like in normal mingw setups. > > For these patches, I'm configuring with --host=aarch64-w64-mingw32 and nothing else. > > In case you're interested in testing building in armv7 mode, you'll run into similar issues as for aarch64 (the ELF specific .type/.size aren't supported etc), but the build for that target succeeded out of the box so there wasn't quite as much need for patches there. One main caveat to know about windows on armv7 is that it's thumb-only, which can be an issue if there's assembly that isn't ready to be assembled in that mode. > > If you actually want to try running binaries as well, you can actually get surprisingly far with running things in wine on aarch64 linux. For best results, you need a wine built with a recent clang (>= clang 5.0) with a patch [2] that isn't yet merged in upstream wine. Thanks, I managed to build clang/mingw tools and libgcrypt with them. However, I did not have as much luck with wine. Current git upstream builds fine, and I managed to manually apply patch [2]. Yet any attempt to run aarch64/win64/PE executable on native aarch64 yields some wine assert failure prints and wine refuses to start up :( Even less luck on qemu-aarch64 chroot over x86_64... wine-aarch64 just segfaults. -Jussi > > // Martin > > [1] http://releases.llvm.org/download.html#6.0.0 > [2] https://source.winehq.org/patches/data/136484 > > From martin at martin.st Sun Mar 25 21:13:24 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Sun, 25 Mar 2018 22:13:24 +0300 (EEST) Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: Hei, On Sun, 25 Mar 2018, Jussi Kivilinna wrote: > Moi, > > On 22.03.2018 23:05, Martin Storsj? wrote: >> Moikka, >> >> On Thu, 22 Mar 2018, Jussi Kivilinna wrote: >> >>> On 22.03.2018 10:56, Martin Storsj? wrote: >>>> This fixes building this file for windows on aarch64. >>>> >>>> Signed-off-by: Martin Storsj? >>> >>> Thanks for the patches. There is few generic things that needs to be done >>> before these can be applied: >>> - Check signing off DCO part in doc/HACKING >> >> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup. >> > > I'd also need to verify the signed DCO, so you would need to export > your public key (maybe by send it to key-server). Ah, right - that should be done now, and I'm finding it on a few public search interfaces on the web. > Thanks, I managed to build clang/mingw tools and libgcrypt with them. Ok, that's great. > However, I did not have as much luck with wine. Current git upstream > builds fine, and I managed to manually apply patch [2]. Yet any attempt > to run aarch64/win64/PE executable on native aarch64 yields some wine > assert failure prints and wine refuses to start up :( Even less luck on > qemu-aarch64 chroot over x86_64... wine-aarch64 just segfaults. Hmm, that doesn't sound too promising. Does it work on its own if you run e.g. "wine64 cmd", that doesn't load external binaries? I have a few other patches locally as well that haven't been merged upstream, that probably also are needed: - https://source.winehq.org/patches/data/135165 - https://source.winehq.org/patches/data/137759 These relate to the use of the x18 register. To have things working really properly, you'd need to use a linux distribution that is built completely with the -ffixed-x18 flag, to make the compiler avoid touching the x18 register even though it's normally allowed on linux. Since you probably don't have that (and neither do I), the first of the two patches adds this flag while building wine (which in itself only helps as long as wine doesn't call out to other libraries like glibc), and the second one that tries to restore x18 on calls to public functions, in case e.g. glibc actually had clobbered it. That works for calls to public functions, but not e.g. if wine does a callback to user code, or for COM interfaces. These aren't enough to guarantee that things will work though (only a full distribution built with -ffixed-x18 would do), so they aren't merged upstream. Despite that, for me they are enough to have a lot of code working. (Code built with MSVC will use x18 a lot, code built with mingw libraries and compiler-rt uses it a bit less.) For your case, even though the external binaries themselves might not rely on x18 all that much, wine and glibc themselves might, and might be broken by the few places where wine unconditionally sets the register. Hopefully these two patches fixes the issue you are running into. // Martin From jussi.kivilinna at iki.fi Sun Mar 25 22:17:49 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 25 Mar 2018 23:17:49 +0300 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: References: <1521708975-30902-1-git-send-email-martin@martin.st> Message-ID: <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi> On 25.03.2018 22:13, Martin Storsj? wrote: > Hei, > > On Sun, 25 Mar 2018, Jussi Kivilinna wrote: > >> Moi, >> >> On 22.03.2018 23:05, Martin Storsj? wrote: >>> Moikka, >>> >>> On Thu, 22 Mar 2018, Jussi Kivilinna wrote: >>> >>>> On 22.03.2018 10:56, Martin Storsj? wrote: >>>>> This fixes building this file for windows on aarch64. >>>>> >>>>> Signed-off-by: Martin Storsj? >>>> >>>> Thanks for the patches. There is few generic things that needs to be done >>>> before these can be applied: >>>> - Check signing off DCO part in doc/HACKING >>> >>> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup. >>> >> >> I'd also need to verify the signed DCO, so you would need to export >> your public key (maybe by send it to key-server). > > Ah, right - that should be done now, and I'm finding it on a few public search interfaces on the web. > Appears that somewhere along the way newlines on your DCO changed from Unix format to DOS and verifying DCO+DCO.sig fails: $ gpg --verify DCO.sig DCO gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET gpg: using RSA key 0x2F9B2688742ACF25 gpg: BAD signature from "Martin Storsj? " [unknown] When newlines are reverted back to unix format, signature matches: $ dos2unix < DCO | gpg --verify DCO.sig - gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET gpg: using RSA key 0x2F9B2688742ACF25 gpg: Good signature from "Martin Storsj? " [unknown] Which is good enough for me, but what do others think? Werner? -Jussi From martin at martin.st Sun Mar 25 22:33:16 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Sun, 25 Mar 2018 23:33:16 +0300 (EEST) Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi> References: <1521708975-30902-1-git-send-email-martin@martin.st> <1a026668-f30a-578c-06a0-fefc260e9e06@iki.fi> Message-ID: On Sun, 25 Mar 2018, Jussi Kivilinna wrote: > On 25.03.2018 22:13, Martin Storsj? wrote: >> Hei, >> >> On Sun, 25 Mar 2018, Jussi Kivilinna wrote: >> >>> Moi, >>> >>> On 22.03.2018 23:05, Martin Storsj? wrote: >>>> Moikka, >>>> >>>> On Thu, 22 Mar 2018, Jussi Kivilinna wrote: >>>> >>>>> On 22.03.2018 10:56, Martin Storsj? wrote: >>>>>> This fixes building this file for windows on aarch64. >>>>>> >>>>>> Signed-off-by: Martin Storsj? >>>>> >>>>> Thanks for the patches. There is few generic things that needs to be done >>>>> before these can be applied: >>>>> - Check signing off DCO part in doc/HACKING >>>> >>>> Thanks - this should done now, hopefully the signatures should be fine even though I don't have a really proper mail+PGP setup. >>>> >>> >>> I'd also need to verify the signed DCO, so you would need to export >>> your public key (maybe by send it to key-server). >> >> Ah, right - that should be done now, and I'm finding it on a few public search interfaces on the web. >> > Appears that somewhere along the way newlines on your DCO changed from Unix format to DOS and verifying DCO+DCO.sig fails: > $ gpg --verify DCO.sig DCO > gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET > gpg: using RSA key 0x2F9B2688742ACF25 > gpg: BAD signature from "Martin Storsj? " [unknown] > > When newlines are reverted back to unix format, signature matches: > $ dos2unix < DCO | gpg --verify DCO.sig - > gpg: Signature made to 22. maaliskuuta 2018 22.53.22 EET > gpg: using RSA key 0x2F9B2688742ACF25 > gpg: Good signature from "Martin Storsj? " [unknown] > > Which is good enough for me, but what do others think? Werner? Would it make things better to resend it with the signed attachment originally with dos newlines (or fixing whatever changed them and resending), or does that make my mess even bigger? // Martin From martin at martin.st Tue Mar 27 22:49:57 2018 From: martin at martin.st (=?UTF-8?Q?Martin_Storsj=c3=b6?=) Date: Tue, 27 Mar 2018 23:49:57 +0300 Subject: DCO (try 2) Message-ID: Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Martin Storsj? From martin at martin.st Wed Mar 28 13:54:23 2018 From: martin at martin.st (=?UTF-8?Q?Martin_Storsj=c3=b6?=) Date: Wed, 28 Mar 2018 14:54:23 +0300 Subject: DCO (try 3) Message-ID: Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Martin Storsj? -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: OpenPGP digital signature URL: From jussi.kivilinna at iki.fi Wed Mar 28 18:18:42 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 28 Mar 2018 19:18:42 +0300 Subject: DCO (try 3) In-Reply-To: References: Message-ID: <4ab5ed5d-da37-a175-4ccd-79918b6d4f41@iki.fi> On 28.03.2018 14:54, Martin Storsj? wrote: > Libgcrypt Developer's Certificate of Origin. Version 1.0 > ========================================================= > Thanks. -Jussi -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 638 bytes Desc: OpenPGP digital signature URL: From jussi.kivilinna at iki.fi Wed Mar 28 19:53:17 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 28 Mar 2018 20:53:17 +0300 Subject: [PATCH 1/2] poly1305: silence compiler warning on clang/aarch64 Message-ID: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain> * cipher/poly1305.c (MUL_MOD_1305_64): cast zero constant to 64-bits. -- This patch fixes "value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths]" warnings when building with clang/aarch64. Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/poly1305.c b/cipher/poly1305.c index 68d9b9015..571f82862 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -130,7 +130,7 @@ static void poly1305_init (poly1305_context_t *ctx, /* carry propagation */ \ H2 = H0 & 3; \ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ - ADD_1305_64(H2, H1, H0, 0, x0_hi, x0_lo); \ + ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ } while (0) unsigned int From jussi.kivilinna at iki.fi Wed Mar 28 19:53:22 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 28 Mar 2018 20:53:22 +0300 Subject: [PATCH 2/2] aarch64/assembly: only use the lower 32 bit of an int parameters In-Reply-To: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain> References: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain> Message-ID: <152225960280.17658.4754419425065114668.stgit@localhost.localdomain> * cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block) (__gcry_camellia_arm_decrypt_block): Make comment section about input registers match usage. * cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use 'w12' and 'w7' instead of 'x12' and 'x7'. (_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function prototype in comments. * mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t parameters. * mpi/aarch64/mpih-mul1.S: Ditto. * mpi/aarch64/mpih-mul2.S: Ditto. * mpi/aarch64/mpih-mul3.S: Ditto. * mpi/aarch64/mpih-sub1.S: Ditto. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index c3cc463d5..b0e9a0335 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block: * x0: keytable * x1: dst * x2: src - * x3: keybitlen + * w3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; @@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block: * x0: keytable * x1: dst * x2: src - * x3: keybitlen + * w3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 5859557ab..f0012c20a 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce: * w6: nrounds => w7 * w7: blkn => w12 */ - mov x12, x7 - mov x7, x6 + mov w12, w7 + mov w7, w6 mov x6, x5 mov x5, x4 mov x4, x3 @@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) * void _gcry_aes_xts_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, - * unsigned char *tweak, unsigned int nrounds); + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); */ .align 3 @@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) * void _gcry_aes_xts_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, - * unsigned char *tweak, unsigned int nrounds); + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); */ .align 3 diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index 4ead1c23b..3370320e0 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -29,7 +29,7 @@ * _gcry_mpih_add_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 - * mpi_size_t size) x3 + * mpi_size_t size) w3 */ .text @@ -37,34 +37,34 @@ .globl _gcry_mpih_add_n ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: - and x5, x3, #3; + and w5, w3, #3; adds xzr, xzr, xzr; /* clear carry flag */ - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x4, [x1], #8; - sub x3, x3, #1; + sub w3, w3, #1; ldr x11, [x2], #8; - and x5, x3, #3; + and w5, w3, #3; adcs x4, x4, x11; str x4, [x0], #8; - cbz x3, .Lend; - cbnz x5, .Loop; + cbz w3, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; - sub x3, x3, #4; + sub w3, w3, #4; adcs x4, x4, x5; adcs x6, x6, x7; adcs x8, x8, x9; adcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; - cbnz x3, .Large_loop; + cbnz w3, .Large_loop; .Lend: adc x0, xzr, xzr; diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 8a8626936..8830845a7 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,27 +37,27 @@ .globl _gcry_mpih_mul_1 ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: - and x5, x2, #3; + and w5, w2, #3; mov x4, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x5, [x1], #8; - sub x2, x2, #1; + sub w2, w2, #1; mul x9, x5, x3; umulh x10, x5, x3; - and x5, x2, #3; + and w5, w2, #3; adds x4, x4, x9; str x4, [x0], #8; adc x4, x10, xzr; - cbz x2, .Lend; - cbnz x5, .Loop; + cbz w2, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x5, x6, [x1]; - sub x2, x2, #4; + sub w2, w2, #4; mul x9, x5, x3; ldp x7, x8, [x1, #16]; @@ -89,7 +89,7 @@ _gcry_mpih_mul_1: str x4, [x0], #8; adc x4, x16, xzr; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; .Lend: mov x0, x4; diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index c7c08e5ab..5d736990e 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,11 +37,11 @@ .globl _gcry_mpih_addmul_1 ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: - and x5, x2, #3; + and w5, w2, #3; mov x6, xzr; mov x7, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x5, [x1], #8; @@ -49,21 +49,21 @@ _gcry_mpih_addmul_1: mul x12, x5, x3; ldr x4, [x0]; umulh x13, x5, x3; - sub x2, x2, #1; + sub w2, w2, #1; adds x12, x12, x4; - and x5, x2, #3; + and w5, w2, #3; adc x13, x13, x7; adds x12, x12, x6; str x12, [x0], #8; adc x6, x7, x13; - cbz x2, .Lend; - cbnz x5, .Loop; + cbz w2, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x5, x9, [x1], #16; - sub x2, x2, #4; + sub w2, w2, #4; ldp x4, x8, [x0]; mul x12, x5, x3; @@ -101,7 +101,7 @@ _gcry_mpih_addmul_1: str x14, [x0], #8; adc x6, x7, x15; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; .Lend: mov x0, x6; diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index ccc961e62..f785e5e42 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,9 +37,9 @@ .globl _gcry_mpih_submul_1 ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: - and x5, x2, #3; + and w5, w2, #3; mov x7, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; subs xzr, xzr, xzr; @@ -47,26 +47,26 @@ _gcry_mpih_submul_1: ldr x4, [x1], #8; cinc x7, x7, cc; ldr x5, [x0]; - sub x2, x2, #1; + sub w2, w2, #1; mul x6, x4, x3; subs x5, x5, x7; umulh x4, x4, x3; - and x10, x2, #3; + and w10, w2, #3; cset x7, cc; subs x5, x5, x6; add x7, x7, x4; str x5, [x0], #8; - cbz x2, .Loop_end; - cbnz x10, .Loop; + cbz w2, .Loop_end; + cbnz w10, .Loop; cinc x7, x7, cc; .Large_loop: ldp x4, x8, [x1], #16; - sub x2, x2, #4; + sub w2, w2, #4; ldp x5, x9, [x0]; mul x6, x4, x3; @@ -111,7 +111,7 @@ _gcry_mpih_submul_1: str x9, [x0], #8; cinc x7, x7, cc; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; mov x0, x7; ret; diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index 4a663732d..45a7b0417 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -29,7 +29,7 @@ * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 - * mpi_size_t size) x3 + * mpi_size_t size) w3 */ .text @@ -37,34 +37,34 @@ .globl _gcry_mpih_sub_n ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: - and x5, x3, #3; + and w5, w3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x4, [x1], #8; - sub x3, x3, #1; + sub w3, w3, #1; ldr x11, [x2], #8; - and x5, x3, #3; + and w5, w3, #3; sbcs x4, x4, x11; str x4, [x0], #8; - cbz x3, .Lend; - cbnz x5, .Loop; + cbz w3, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; - sub x3, x3, #4; + sub w3, w3, #4; sbcs x4, x4, x5; sbcs x6, x6, x7; sbcs x8, x8, x9; sbcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; - cbnz x3, .Large_loop; + cbnz w3, .Large_loop; .Lend: cset x0, cc; From jussi.kivilinna at iki.fi Wed Mar 28 20:04:20 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 28 Mar 2018 21:04:20 +0300 Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: <1521754360-5806-1-git-send-email-martin@martin.st> References: <1521754360-5806-1-git-send-email-martin@martin.st> Message-ID: <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi> Hello, On 22.03.2018 23:32, Martin Storsj? wrote: > * random/rndw32.c: Change _WIN64 ifdef into __x86_64__. > -- > > This fixes building this file for windows on aarch64. > Thanks, all patched applied and pushed. -Jussi From martin at martin.st Wed Mar 28 21:37:43 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Wed, 28 Mar 2018 22:37:43 +0300 (EEST) Subject: [PATCH 1/5] random: Don't assume that _WIN64 implies x86_64 In-Reply-To: <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi> References: <1521754360-5806-1-git-send-email-martin@martin.st> <22b0c05a-6753-2246-6640-6379f404ca53@iki.fi> Message-ID: On Wed, 28 Mar 2018, Jussi Kivilinna wrote: > Hello, > > On 22.03.2018 23:32, Martin Storsj? wrote: >> * random/rndw32.c: Change _WIN64 ifdef into __x86_64__. >> -- >> >> This fixes building this file for windows on aarch64. >> > > Thanks, all patched applied and pushed. Thanks! // Martin From martin at martin.st Wed Mar 28 21:39:42 2018 From: martin at martin.st (=?ISO-8859-15?Q?Martin_Storsj=F6?=) Date: Wed, 28 Mar 2018 22:39:42 +0300 (EEST) Subject: [PATCH 2/2] aarch64/assembly: only use the lower 32 bit of an int parameters In-Reply-To: <152225960280.17658.4754419425065114668.stgit@localhost.localdomain> References: <152225959773.17658.6096975446990782871.stgit@localhost.localdomain> <152225960280.17658.4754419425065114668.stgit@localhost.localdomain> Message-ID: On Wed, 28 Mar 2018, Jussi Kivilinna wrote: > * cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block) > (__gcry_camellia_arm_decrypt_block): Make comment section about input > registers match usage. > * cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use > 'w12' and 'w7' instead of 'x12' and 'x7'. > (_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function > prototype in comments. > * mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t > parameters. > * mpi/aarch64/mpih-mul1.S: Ditto. > * mpi/aarch64/mpih-mul2.S: Ditto. > * mpi/aarch64/mpih-mul3.S: Ditto. > * mpi/aarch64/mpih-sub1.S: Ditto. > -- > > Signed-off-by: Jussi Kivilinna > --- Both patches look good to me, for what it's worth. // Martin From cvs at cvs.gnupg.org Wed Mar 28 19:40:21 2018 From: cvs at cvs.gnupg.org (by Martin Storsjö) Date: Wed, 28 Mar 2018 19:40:21 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-57-g0de2191 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4 (commit) via 4e1b628f492643d4e9b830bcdab7b49daaec5854 (commit) via 36e916fc332eda74963192b1c0bf6860a3e5d67b (commit) via ec0a2f25c0f64a7b65b373508ce9081e10461965 (commit) via ed41d6d6fb4551342b22ef763de1bd60e964e186 (commit) via 8ee38806245ca8452051b1a245f44082323f37f6 (commit) from 885f031fbd17abc1c0fedbb98df22823b647fc11 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4 Author: Martin Storsj? Date: Thu Mar 22 23:32:40 2018 +0200 aarch64: Enable building the aarch64 cipher assembly for windows * cipher/asm-common-aarch64.h: New. * cipher/camellia-aarch64.S: Use ELF macro, use x19 instead of x18. * cipher/chacha20-aarch64.S: Use ELF macro, don't use GOT on windows. * cipher/cipher-gcm-armv8-aarch64-ce.S: Use ELF macro. * cipher/rijndael-aarch64.S: Use ELF macro. * cipher/rijndael-armv8-aarch64-ce.S: Use ELF macro. * cipher/sha1-armv8-aarch64-ce.S: Use ELF macro. * cipher/sha256-armv8-aarch64-ce.S: Use ELF macro. * cipher/twofish-aarch64.S: Use ELF macro. * configure.ac: Don't require .size and .type in aarch64 assembly check. -- Don't require .type and .size in configure; we can make them optional via a preprocessor macro. This is mostly a mechanical change, wrapping the .type and .size directives in an ELF() macro, with two actual manual changes: (when targeting windows): - Don't load global symbols via a GOT (in chacha20) - Don't use the x18 register (in camellia); back up and restore x19 in the prologue/epilogue and use that instead. x18 is a platform specific register; on linux, it's free to be used by user code, while it's reserved for platform use on windows and darwin. Always use x19 instead of x18 for consistency. Signed-off-by: Martin Storsj? diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h new file mode 100644 index 0000000..814b7ad --- /dev/null +++ b/cipher/asm-common-aarch64.h @@ -0,0 +1,32 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#include + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 68d2a7d..c3cc463 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -19,7 +19,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -55,12 +55,12 @@ #define RT0 w15 #define RT1 w16 #define RT2 w17 -#define RT3 w18 +#define RT3 w19 #define xRT0 x15 #define xRT1 x16 #define xRT2 x17 -#define xRT3 x18 +#define xRT3 x19 #ifdef __AARCH64EL__ #define host_to_be(reg, rtmp) \ @@ -198,9 +198,10 @@ str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); .globl _gcry_camellia_arm_encrypt_block -.type _gcry_camellia_arm_encrypt_block, at function; +ELF(.type _gcry_camellia_arm_encrypt_block, at function;) _gcry_camellia_arm_encrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst @@ -227,6 +228,7 @@ _gcry_camellia_arm_encrypt_block: outunpack(24); + ldp x19, x30, [sp], #16 ret; .ltorg @@ -236,14 +238,16 @@ _gcry_camellia_arm_encrypt_block: outunpack(32); + ldp x19, x30, [sp], #16 ret; .ltorg -.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; +ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;) .globl _gcry_camellia_arm_decrypt_block -.type _gcry_camellia_arm_decrypt_block, at function; +ELF(.type _gcry_camellia_arm_decrypt_block, at function;) _gcry_camellia_arm_decrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst @@ -271,6 +275,7 @@ _gcry_camellia_arm_decrypt_block: outunpack(0); + ldp x19, x30, [sp], #16 ret; .ltorg @@ -281,11 +286,11 @@ _gcry_camellia_arm_decrypt_block: b .Ldec_128; .ltorg -.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; +ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;) /* Encryption/Decryption tables */ .globl _gcry_camellia_arm_tables -.type _gcry_camellia_arm_tables, at object; +ELF(.type _gcry_camellia_arm_tables, at object;) .balign 32 _gcry_camellia_arm_tables: .Lcamellia_sp1110: @@ -551,7 +556,7 @@ _gcry_camellia_arm_tables: .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e -.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables; +ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 5990a08..3844d4e 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -27,7 +27,7 @@ * Public domain. */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -38,9 +38,15 @@ .text +#ifdef _WIN32 +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name ; \ + add reg, reg, #:lo12:name ; +#else #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; +#endif /* register macros */ #define INPUT x0 @@ -148,7 +154,7 @@ chacha20_data: .align 3 .globl _gcry_chacha20_aarch64_blocks4 -.type _gcry_chacha20_aarch64_blocks4,%function; +ELF(.type _gcry_chacha20_aarch64_blocks4,%function;) _gcry_chacha20_aarch64_blocks4: /* input: @@ -303,6 +309,6 @@ _gcry_chacha20_aarch64_blocks4: eor x0, x0, x0 ret -.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4; +ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;) #endif diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index 0cfaf1c..b6c4f59 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -174,7 +174,7 @@ gcry_gcm_reduction_constant: */ .align 3 .globl _gcry_ghash_armv8_ce_pmull -.type _gcry_ghash_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_armv8_ce_pmull,%function;) _gcry_ghash_armv8_ce_pmull: /* input: * x0: gcm_key @@ -360,7 +360,7 @@ _gcry_ghash_armv8_ce_pmull: .Ldo_nothing: mov x0, #0 ret -.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; +ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;) /* @@ -368,7 +368,7 @@ _gcry_ghash_armv8_ce_pmull: */ .align 3 .globl _gcry_ghash_setup_armv8_ce_pmull -.type _gcry_ghash_setup_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;) _gcry_ghash_setup_armv8_ce_pmull: /* input: * x0: gcm_key @@ -408,6 +408,6 @@ _gcry_ghash_setup_armv8_ce_pmull: st1 {rh5.16b-rh6.16b}, [x1] ret -.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; +ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;) #endif diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S index e533bbe..aad7487 100644 --- a/cipher/rijndael-aarch64.S +++ b/cipher/rijndael-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -206,7 +206,7 @@ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_encrypt_block -.type _gcry_aes_arm_encrypt_block,%function; +ELF(.type _gcry_aes_arm_encrypt_block,%function;) _gcry_aes_arm_encrypt_block: /* input: @@ -285,7 +285,7 @@ _gcry_aes_arm_encrypt_block: lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; -.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; +ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;) #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ @@ -429,7 +429,7 @@ _gcry_aes_arm_encrypt_block: addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_decrypt_block -.type _gcry_aes_arm_decrypt_block,%function; +ELF(.type _gcry_aes_arm_decrypt_block,%function;) _gcry_aes_arm_decrypt_block: /* input: @@ -504,7 +504,7 @@ _gcry_aes_arm_decrypt_block: decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; -.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block; +ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__ */ diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 40097a7..5859557 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -239,7 +239,7 @@ */ .align 3 .globl _gcry_aes_enc_armv8_ce -.type _gcry_aes_enc_armv8_ce,%function; +ELF(.type _gcry_aes_enc_armv8_ce,%function;) _gcry_aes_enc_armv8_ce: /* input: * x0: keysched @@ -291,7 +291,7 @@ _gcry_aes_enc_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Lenc1_tail -.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; +ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;) /* @@ -301,7 +301,7 @@ _gcry_aes_enc_armv8_ce: */ .align 3 .globl _gcry_aes_dec_armv8_ce -.type _gcry_aes_dec_armv8_ce,%function; +ELF(.type _gcry_aes_dec_armv8_ce,%function;) _gcry_aes_dec_armv8_ce: /* input: * x0: keysched @@ -353,7 +353,7 @@ _gcry_aes_dec_armv8_ce: CLEAR_REG(vk13) CLEAR_REG(vk14) b .Ldec1_tail -.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; +ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;) /* @@ -366,7 +366,7 @@ _gcry_aes_dec_armv8_ce: .align 3 .globl _gcry_aes_cbc_enc_armv8_ce -.type _gcry_aes_cbc_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_enc_armv8_ce,%function;) _gcry_aes_cbc_enc_armv8_ce: /* input: * x0: keysched @@ -419,7 +419,7 @@ _gcry_aes_cbc_enc_armv8_ce: .Lcbc_enc_skip: ret -.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; +ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;) /* * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, @@ -430,7 +430,7 @@ _gcry_aes_cbc_enc_armv8_ce: .align 3 .globl _gcry_aes_cbc_dec_armv8_ce -.type _gcry_aes_cbc_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_dec_armv8_ce,%function;) _gcry_aes_cbc_dec_armv8_ce: /* input: * x0: keysched @@ -515,7 +515,7 @@ _gcry_aes_cbc_dec_armv8_ce: .Lcbc_dec_skip: ret -.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; +ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;) /* @@ -527,7 +527,7 @@ _gcry_aes_cbc_dec_armv8_ce: .align 3 .globl _gcry_aes_ctr_enc_armv8_ce -.type _gcry_aes_ctr_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ctr_enc_armv8_ce,%function;) _gcry_aes_ctr_enc_armv8_ce: /* input: * r0: keysched @@ -669,7 +669,7 @@ _gcry_aes_ctr_enc_armv8_ce: .Lctr_enc_skip: ret -.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; +ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;) /* @@ -681,7 +681,7 @@ _gcry_aes_ctr_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_enc_armv8_ce -.type _gcry_aes_cfb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_enc_armv8_ce,%function;) _gcry_aes_cfb_enc_armv8_ce: /* input: * r0: keysched @@ -732,7 +732,7 @@ _gcry_aes_cfb_enc_armv8_ce: .Lcfb_enc_skip: ret -.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; +ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;) /* @@ -744,7 +744,7 @@ _gcry_aes_cfb_enc_armv8_ce: .align 3 .globl _gcry_aes_cfb_dec_armv8_ce -.type _gcry_aes_cfb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_dec_armv8_ce,%function;) _gcry_aes_cfb_dec_armv8_ce: /* input: * r0: keysched @@ -829,7 +829,7 @@ _gcry_aes_cfb_dec_armv8_ce: .Lcfb_dec_skip: ret -.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; +ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;) /* @@ -846,7 +846,7 @@ _gcry_aes_cfb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_enc_armv8_ce -.type _gcry_aes_ocb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_enc_armv8_ce,%function;) _gcry_aes_ocb_enc_armv8_ce: /* input: * x0: keysched @@ -979,7 +979,7 @@ _gcry_aes_ocb_enc_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; +ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;) /* @@ -996,7 +996,7 @@ _gcry_aes_ocb_enc_armv8_ce: .align 3 .globl _gcry_aes_ocb_dec_armv8_ce -.type _gcry_aes_ocb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_dec_armv8_ce,%function;) _gcry_aes_ocb_dec_armv8_ce: /* input: * x0: keysched @@ -1129,7 +1129,7 @@ _gcry_aes_ocb_dec_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; +ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;) /* @@ -1145,7 +1145,7 @@ _gcry_aes_ocb_dec_armv8_ce: .align 3 .globl _gcry_aes_ocb_auth_armv8_ce -.type _gcry_aes_ocb_auth_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_auth_armv8_ce,%function;) _gcry_aes_ocb_auth_armv8_ce: /* input: * x0: keysched @@ -1273,7 +1273,7 @@ _gcry_aes_ocb_auth_armv8_ce: CLEAR_REG(v16) ret -.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; +ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) /* @@ -1285,7 +1285,7 @@ _gcry_aes_ocb_auth_armv8_ce: .align 3 .globl _gcry_aes_xts_enc_armv8_ce -.type _gcry_aes_xts_enc_armv8_ce,%function; +ELF(.type _gcry_aes_xts_enc_armv8_ce,%function;) _gcry_aes_xts_enc_armv8_ce: /* input: * r0: keysched @@ -1410,7 +1410,7 @@ _gcry_aes_xts_enc_armv8_ce: .Lxts_enc_skip: ret -.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; +ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) /* @@ -1422,7 +1422,7 @@ _gcry_aes_xts_enc_armv8_ce: .align 3 .globl _gcry_aes_xts_dec_armv8_ce -.type _gcry_aes_xts_dec_armv8_ce,%function; +ELF(.type _gcry_aes_xts_dec_armv8_ce,%function;) _gcry_aes_xts_dec_armv8_ce: /* input: * r0: keysched @@ -1547,7 +1547,7 @@ _gcry_aes_xts_dec_armv8_ce: .Lxts_dec_skip: ret -.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; +ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;) /* @@ -1555,7 +1555,7 @@ _gcry_aes_xts_dec_armv8_ce: */ .align 3 .globl _gcry_aes_sbox4_armv8_ce -.type _gcry_aes_sbox4_armv8_ce,%function; +ELF(.type _gcry_aes_sbox4_armv8_ce,%function;) _gcry_aes_sbox4_armv8_ce: /* See "Gouv?a, C. P. L. & L?pez, J. Implementing GCM on ARMv8. Topics in * Cryptology ? CT-RSA 2015" for details. @@ -1568,7 +1568,7 @@ _gcry_aes_sbox4_armv8_ce: mov w0, v0.S[0] CLEAR_REG(v0) ret -.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; +ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;) /* @@ -1576,13 +1576,13 @@ _gcry_aes_sbox4_armv8_ce: */ .align 3 .globl _gcry_aes_invmixcol_armv8_ce -.type _gcry_aes_invmixcol_armv8_ce,%function; +ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;) _gcry_aes_invmixcol_armv8_ce: ld1 {v0.16b}, [x1] aesimc v0.16b, v0.16b st1 {v0.16b}, [x0] CLEAR_REG(v0) ret -.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; +ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;) #endif diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S index ec1810d..aeb67a1 100644 --- a/cipher/sha1-armv8-aarch64-ce.S +++ b/cipher/sha1-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -103,7 +103,7 @@ gcry_sha1_aarch64_ce_K_VEC: */ .align 3 .globl _gcry_sha1_transform_armv8_ce -.type _gcry_sha1_transform_armv8_ce,%function; +ELF(.type _gcry_sha1_transform_armv8_ce,%function;) _gcry_sha1_transform_armv8_ce: /* input: * x0: ctx, CTX @@ -199,6 +199,6 @@ _gcry_sha1_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; +ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;) #endif diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S index a4575da..6b3ad32 100644 --- a/cipher/sha256-armv8-aarch64-ce.S +++ b/cipher/sha256-armv8-aarch64-ce.S @@ -17,7 +17,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ @@ -113,7 +113,7 @@ gcry_sha256_aarch64_ce_K: */ .align 3 .globl _gcry_sha256_transform_armv8_ce -.type _gcry_sha256_transform_armv8_ce,%function; +ELF(.type _gcry_sha256_transform_armv8_ce,%function;) _gcry_sha256_transform_armv8_ce: /* input: * r0: ctx, CTX @@ -213,6 +213,6 @@ _gcry_sha256_transform_armv8_ce: .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; +ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;) #endif diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S index 99c4675..adee412 100644 --- a/cipher/twofish-aarch64.S +++ b/cipher/twofish-aarch64.S @@ -18,7 +18,7 @@ * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS @@ -217,7 +217,7 @@ ror1(RD); .globl _gcry_twofish_arm_encrypt_block -.type _gcry_twofish_arm_encrypt_block,%function; +ELF(.type _gcry_twofish_arm_encrypt_block,%function;) _gcry_twofish_arm_encrypt_block: /* input: @@ -263,10 +263,10 @@ _gcry_twofish_arm_encrypt_block: ret; .ltorg -.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; +ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;) .globl _gcry_twofish_arm_decrypt_block -.type _gcry_twofish_arm_decrypt_block,%function; +ELF(.type _gcry_twofish_arm_decrypt_block,%function;) _gcry_twofish_arm_decrypt_block: /* input: @@ -311,7 +311,7 @@ _gcry_twofish_arm_decrypt_block: str_output_le(RDST, RA, RB, RC, RD, RT0, RT1); ret; -.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; +ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/configure.ac b/configure.ac index b5d7211..330485f 100644 --- a/configure.ac +++ b/configure.ac @@ -1119,10 +1119,6 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly i "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" - - /* Test if '.type' and '.size' are supported. */ - ".size asmfunc,.-asmfunc;\n\t" - ".type asmfunc, at function;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then commit 4e1b628f492643d4e9b830bcdab7b49daaec5854 Author: Martin Storsj? Date: Thu Mar 22 23:32:39 2018 +0200 aarch64: camellia: Only use the lower 32 bit of an int parameter * cipher/camellia-aarch64.S: Use 'w3' instead of 'x3'. -- The keybits parameter is declared as int, and in those cases, the upper half of a register is undefined, not guaranteed to be zero. Signed-off-by: Martin Storsj? diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 440f69f..68d2a7d 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -33,7 +33,7 @@ #define CTX x0 #define RDST x1 #define RSRC x2 -#define RKEYBITS x3 +#define RKEYBITS w3 #define RTAB1 x4 #define RTAB2 x5 commit 36e916fc332eda74963192b1c0bf6860a3e5d67b Author: Martin Storsj? Date: Thu Mar 22 23:32:38 2018 +0200 aarch64: Fix assembling chacha20-aarch64.S with clang/llvm * cipher/chacha20-aarch64.S: Remove superfluous lane counts. -- When referring to a specific lane, one doesn't need to specify the total number of lanes of the register. With GNU binutils, both forms are accepted, while clang/llvm rejects the form with the unnecessary number of lanes. Signed-off-by: Martin Storsj? diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 739ddde..5990a08 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -170,27 +170,27 @@ _gcry_chacha20_aarch64_blocks4: mov ROUND, #20; ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; - dup X12.4s, X15.4s[0]; - dup X13.4s, X15.4s[1]; + dup X12.4s, X15.s[0]; + dup X13.4s, X15.s[1]; ldr CTR, [INPUT_CTR]; add X12.4s, X12.4s, VCTR.4s; - dup X0.4s, VTMP1.4s[0]; - dup X1.4s, VTMP1.4s[1]; - dup X2.4s, VTMP1.4s[2]; - dup X3.4s, VTMP1.4s[3]; - dup X14.4s, X15.4s[2]; + dup X0.4s, VTMP1.s[0]; + dup X1.4s, VTMP1.s[1]; + dup X2.4s, VTMP1.s[2]; + dup X3.4s, VTMP1.s[3]; + dup X14.4s, X15.s[2]; cmhi VTMP0.4s, VCTR.4s, X12.4s; - dup X15.4s, X15.4s[3]; + dup X15.4s, X15.s[3]; add CTR, CTR, #4; /* Update counter */ - dup X4.4s, VTMP2.4s[0]; - dup X5.4s, VTMP2.4s[1]; - dup X6.4s, VTMP2.4s[2]; - dup X7.4s, VTMP2.4s[3]; + dup X4.4s, VTMP2.s[0]; + dup X5.4s, VTMP2.s[1]; + dup X6.4s, VTMP2.s[2]; + dup X7.4s, VTMP2.s[3]; sub X13.4s, X13.4s, VTMP0.4s; - dup X8.4s, VTMP3.4s[0]; - dup X9.4s, VTMP3.4s[1]; - dup X10.4s, VTMP3.4s[2]; - dup X11.4s, VTMP3.4s[3]; + dup X8.4s, VTMP3.s[0]; + dup X9.4s, VTMP3.s[1]; + dup X10.4s, VTMP3.s[2]; + dup X11.4s, VTMP3.s[3]; mov X12_TMP.16b, X12.16b; mov X13_TMP.16b, X13.16b; str CTR, [INPUT_CTR]; @@ -208,19 +208,19 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 0 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 1 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 2 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 3 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ PLUS(X0, VTMP2); PLUS(X1, VTMP3); PLUS(X2, X12_TMP); PLUS(X3, X13_TMP); - dup VTMP2.4s, VTMP1.4s[0]; /* INPUT + 4 * 4 */ - dup VTMP3.4s, VTMP1.4s[1]; /* INPUT + 5 * 4 */ - dup X12_TMP.4s, VTMP1.4s[2]; /* INPUT + 6 * 4 */ - dup X13_TMP.4s, VTMP1.4s[3]; /* INPUT + 7 * 4 */ + dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ + dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ + dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ + dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; mov INPUT_POS, INPUT; PLUS(X4, VTMP2); @@ -228,12 +228,12 @@ _gcry_chacha20_aarch64_blocks4: PLUS(X6, X12_TMP); PLUS(X7, X13_TMP); - dup VTMP2.4s, VTMP0.4s[0]; /* INPUT + 8 * 4 */ - dup VTMP3.4s, VTMP0.4s[1]; /* INPUT + 9 * 4 */ - dup X12_TMP.4s, VTMP0.4s[2]; /* INPUT + 10 * 4 */ - dup X13_TMP.4s, VTMP0.4s[3]; /* INPUT + 11 * 4 */ - dup VTMP0.4s, VTMP1.4s[2]; /* INPUT + 14 * 4 */ - dup VTMP1.4s, VTMP1.4s[3]; /* INPUT + 15 * 4 */ + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ + dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ + dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ PLUS(X8, VTMP2); PLUS(X9, VTMP3); PLUS(X10, X12_TMP); commit ec0a2f25c0f64a7b65b373508ce9081e10461965 Author: Martin Storsj? Date: Thu Mar 22 23:32:37 2018 +0200 aarch64: mpi: Fix building the mpi aarch64 assembly for windows * mpi/aarch64/mpih-add1.S: Use ELF macro. * mpi/aarch64/mpih-mul1.S: Use ELF macro. * mpi/aarch64/mpih-mul2.S: Use ELF macro. * mpi/aarch64/mpih-mul3.S: Use ELF macro. * mpi/aarch64/mpih-sub1.S: Use ELF macro. * mpi/asm-common-aarch64.h: New. -- The mpi aarch64 assembly is enabled as soon as the compiler supports inline assembly, without checking for .type and .size, as is done for the rest of the assembly in cipher/*.S. (The .type and .size directives are only supported on ELF.) Signed-off-by: Martin Storsj? diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index fa8cd01..4ead1c2 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_add_n -.type _gcry_mpih_add_n,%function +ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: and x5, x3, #3; adds xzr, xzr, xzr; /* clear carry flag */ @@ -68,4 +69,4 @@ _gcry_mpih_add_n: .Lend: adc x0, xzr, xzr; ret; -.size _gcry_mpih_add_n,.-_gcry_mpih_add_n; +ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;) diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 65e98fe..8a86269 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_mul_1 -.type _gcry_mpih_mul_1,%function +ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: and x5, x2, #3; mov x4, xzr; @@ -93,4 +94,4 @@ _gcry_mpih_mul_1: .Lend: mov x0, x4; ret; -.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1; +ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;) diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index bd3b2c9..c7c08e5 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_addmul_1 -.type _gcry_mpih_addmul_1,%function +ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: and x5, x2, #3; mov x6, xzr; @@ -105,4 +106,4 @@ _gcry_mpih_addmul_1: .Lend: mov x0, x6; ret; -.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1; +ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;) diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index a58bc53..ccc961e 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_submul_1 -.type _gcry_mpih_submul_1,%function +ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: and x5, x2, #3; mov x7, xzr; @@ -118,4 +119,4 @@ _gcry_mpih_submul_1: .Loop_end: cinc x0, x7, cc; ret; -.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1; +ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;) diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index cbf2f08..4a66373 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -22,6 +22,7 @@ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t @@ -34,7 +35,7 @@ .text .globl _gcry_mpih_sub_n -.type _gcry_mpih_sub_n,%function +ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: and x5, x3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ @@ -68,4 +69,4 @@ _gcry_mpih_sub_n: .Lend: cset x0, cc; ret; -.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n; +ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;) diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h new file mode 100644 index 0000000..1269413 --- /dev/null +++ b/mpi/asm-common-aarch64.h @@ -0,0 +1,30 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsj? + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef MPI_ASM_COMMON_AARCH64_H +#define MPI_ASM_COMMON_AARCH64_H + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* MPI_ASM_COMMON_AARCH64_H */ commit ed41d6d6fb4551342b22ef763de1bd60e964e186 Author: Martin Storsj? Date: Thu Mar 22 23:32:36 2018 +0200 random: Don't assume that _WIN64 implies x86_64 * random/rndw32.c: Change _WIN64 ifdef into __x86_64__. -- This fixes building this file for windows on aarch64. Signed-off-by: Martin Storsj? diff --git a/random/rndw32.c b/random/rndw32.c index 7e9ac50..08a8867 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -986,7 +986,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, On AMD64, TSC is always available and intrinsic is provided for accessing it. */ -#ifdef __WIN64__ +#ifdef __x86_64__ { unsigned __int64 aint64; @@ -1024,7 +1024,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, (*add) (&aword, sizeof (aword), origin ); } } -#endif /*__WIN64__*/ +#endif /*__x86_64__*/ } commit 8ee38806245ca8452051b1a245f44082323f37f6 Author: Jussi Kivilinna Date: Wed Mar 28 20:32:56 2018 +0300 Register DCO for Martin Storsj? -- Signed-off-by: Jussi Kivilinna diff --git a/AUTHORS b/AUTHORS index 8c553e6..49ab941 100644 --- a/AUTHORS +++ b/AUTHORS @@ -172,6 +172,9 @@ Jussi Kivilinna Markus Teich 2014-10-08:20141008180509.GA2770 at trolle: +Martin Storsj? +2018-03-28:dc1605ce-a47d-34c5-8851-d9569f9ea5d3 at martin.st: + Mathias L. Baumann 2017-01-30:07c06d79-0828-b564-d604-fd16c7c86ebe at sociomantic.com: ----------------------------------------------------------------------- Summary of changes: AUTHORS | 3 ++ mpi/ec-ed25519.c => cipher/asm-common-aarch64.h | 29 +++++----- cipher/camellia-aarch64.S | 25 +++++---- cipher/chacha20-aarch64.S | 72 +++++++++++++------------ cipher/cipher-gcm-armv8-aarch64-ce.S | 10 ++-- cipher/rijndael-aarch64.S | 10 ++-- cipher/rijndael-armv8-aarch64-ce.S | 58 ++++++++++---------- cipher/sha1-armv8-aarch64-ce.S | 6 +-- cipher/sha256-armv8-aarch64-ce.S | 6 +-- cipher/twofish-aarch64.S | 10 ++-- configure.ac | 4 -- mpi/aarch64/mpih-add1.S | 5 +- mpi/aarch64/mpih-mul1.S | 5 +- mpi/aarch64/mpih-mul2.S | 5 +- mpi/aarch64/mpih-mul3.S | 5 +- mpi/aarch64/mpih-sub1.S | 5 +- mpi/{ec-internal.h => asm-common-aarch64.h} | 17 +++--- random/rndw32.c | 4 +- 18 files changed, 147 insertions(+), 132 deletions(-) copy mpi/ec-ed25519.c => cipher/asm-common-aarch64.h (68%) copy mpi/{ec-internal.h => asm-common-aarch64.h} (69%) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Thu Mar 29 16:57:04 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Thu, 29 Mar 2018 16:57:04 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-59-g9b58e4a Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 9b58e4a03ba3aeff7bae3f40da706977870c9649 (commit) via 8cdb010f04528703a502344e00d52447de12547d (commit) from 0de2191a07d69ef1fa34ca4c5d5fc4985ff7b4c4 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 9b58e4a03ba3aeff7bae3f40da706977870c9649 Author: Jussi Kivilinna Date: Sat Mar 24 17:49:16 2018 +0200 aarch64/assembly: only use the lower 32 bit of an int parameters * cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block) (__gcry_camellia_arm_decrypt_block): Make comment section about input registers match usage. * cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use 'w12' and 'w7' instead of 'x12' and 'x7'. (_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function prototype in comments. * mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t parameters. * mpi/aarch64/mpih-mul1.S: Ditto. * mpi/aarch64/mpih-mul2.S: Ditto. * mpi/aarch64/mpih-mul3.S: Ditto. * mpi/aarch64/mpih-sub1.S: Ditto. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index c3cc463..b0e9a03 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block: * x0: keytable * x1: dst * x2: src - * x3: keybitlen + * w3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; @@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block: * x0: keytable * x1: dst * x2: src - * x3: keybitlen + * w3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 5859557..f0012c2 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce: * w6: nrounds => w7 * w7: blkn => w12 */ - mov x12, x7 - mov x7, x6 + mov w12, w7 + mov w7, w6 mov x6, x5 mov x5, x4 mov x4, x3 @@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) * void _gcry_aes_xts_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, - * unsigned char *tweak, unsigned int nrounds); + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); */ .align 3 @@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) * void _gcry_aes_xts_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, - * unsigned char *tweak, unsigned int nrounds); + * unsigned char *tweak, + * size_t nblocks, + * unsigned int nrounds); */ .align 3 diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index 4ead1c2..3370320 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -29,7 +29,7 @@ * _gcry_mpih_add_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 - * mpi_size_t size) x3 + * mpi_size_t size) w3 */ .text @@ -37,34 +37,34 @@ .globl _gcry_mpih_add_n ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: - and x5, x3, #3; + and w5, w3, #3; adds xzr, xzr, xzr; /* clear carry flag */ - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x4, [x1], #8; - sub x3, x3, #1; + sub w3, w3, #1; ldr x11, [x2], #8; - and x5, x3, #3; + and w5, w3, #3; adcs x4, x4, x11; str x4, [x0], #8; - cbz x3, .Lend; - cbnz x5, .Loop; + cbz w3, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; - sub x3, x3, #4; + sub w3, w3, #4; adcs x4, x4, x5; adcs x6, x6, x7; adcs x8, x8, x9; adcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; - cbnz x3, .Large_loop; + cbnz w3, .Large_loop; .Lend: adc x0, xzr, xzr; diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 8a86269..8830845 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,27 +37,27 @@ .globl _gcry_mpih_mul_1 ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: - and x5, x2, #3; + and w5, w2, #3; mov x4, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x5, [x1], #8; - sub x2, x2, #1; + sub w2, w2, #1; mul x9, x5, x3; umulh x10, x5, x3; - and x5, x2, #3; + and w5, w2, #3; adds x4, x4, x9; str x4, [x0], #8; adc x4, x10, xzr; - cbz x2, .Lend; - cbnz x5, .Loop; + cbz w2, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x5, x6, [x1]; - sub x2, x2, #4; + sub w2, w2, #4; mul x9, x5, x3; ldp x7, x8, [x1, #16]; @@ -89,7 +89,7 @@ _gcry_mpih_mul_1: str x4, [x0], #8; adc x4, x16, xzr; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; .Lend: mov x0, x4; diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index c7c08e5..5d73699 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,11 +37,11 @@ .globl _gcry_mpih_addmul_1 ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: - and x5, x2, #3; + and w5, w2, #3; mov x6, xzr; mov x7, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x5, [x1], #8; @@ -49,21 +49,21 @@ _gcry_mpih_addmul_1: mul x12, x5, x3; ldr x4, [x0]; umulh x13, x5, x3; - sub x2, x2, #1; + sub w2, w2, #1; adds x12, x12, x4; - and x5, x2, #3; + and w5, w2, #3; adc x13, x13, x7; adds x12, x12, x6; str x12, [x0], #8; adc x6, x7, x13; - cbz x2, .Lend; - cbnz x5, .Loop; + cbz w2, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x5, x9, [x1], #16; - sub x2, x2, #4; + sub w2, w2, #4; ldp x4, x8, [x0]; mul x12, x5, x3; @@ -101,7 +101,7 @@ _gcry_mpih_addmul_1: str x14, [x0], #8; adc x6, x7, x15; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; .Lend: mov x0, x6; diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index ccc961e..f785e5e 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -28,7 +28,7 @@ * mpi_limb_t * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 - * mpi_size_t s1_size, x2 + * mpi_size_t s1_size, w2 * mpi_limb_t s2_limb) x3 */ @@ -37,9 +37,9 @@ .globl _gcry_mpih_submul_1 ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: - and x5, x2, #3; + and w5, w2, #3; mov x7, xzr; - cbz x5, .Large_loop; + cbz w5, .Large_loop; subs xzr, xzr, xzr; @@ -47,26 +47,26 @@ _gcry_mpih_submul_1: ldr x4, [x1], #8; cinc x7, x7, cc; ldr x5, [x0]; - sub x2, x2, #1; + sub w2, w2, #1; mul x6, x4, x3; subs x5, x5, x7; umulh x4, x4, x3; - and x10, x2, #3; + and w10, w2, #3; cset x7, cc; subs x5, x5, x6; add x7, x7, x4; str x5, [x0], #8; - cbz x2, .Loop_end; - cbnz x10, .Loop; + cbz w2, .Loop_end; + cbnz w10, .Loop; cinc x7, x7, cc; .Large_loop: ldp x4, x8, [x1], #16; - sub x2, x2, #4; + sub w2, w2, #4; ldp x5, x9, [x0]; mul x6, x4, x3; @@ -111,7 +111,7 @@ _gcry_mpih_submul_1: str x9, [x0], #8; cinc x7, x7, cc; - cbnz x2, .Large_loop; + cbnz w2, .Large_loop; mov x0, x7; ret; diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index 4a66373..45a7b04 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -29,7 +29,7 @@ * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 - * mpi_size_t size) x3 + * mpi_size_t size) w3 */ .text @@ -37,34 +37,34 @@ .globl _gcry_mpih_sub_n ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: - and x5, x3, #3; + and w5, w3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ - cbz x5, .Large_loop; + cbz w5, .Large_loop; .Loop: ldr x4, [x1], #8; - sub x3, x3, #1; + sub w3, w3, #1; ldr x11, [x2], #8; - and x5, x3, #3; + and w5, w3, #3; sbcs x4, x4, x11; str x4, [x0], #8; - cbz x3, .Lend; - cbnz x5, .Loop; + cbz w3, .Lend; + cbnz w5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; - sub x3, x3, #4; + sub w3, w3, #4; sbcs x4, x4, x5; sbcs x6, x6, x7; sbcs x8, x8, x9; sbcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; - cbnz x3, .Large_loop; + cbnz w3, .Large_loop; .Lend: cset x0, cc; commit 8cdb010f04528703a502344e00d52447de12547d Author: Jussi Kivilinna Date: Sat Mar 24 17:22:45 2018 +0200 poly1305: silence compiler warning on clang/aarch64 * cipher/poly1305.c (MUL_MOD_1305_64): cast zero constant to 64-bits. -- This patch fixes "value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths]" warnings when building with clang/aarch64. Signed-off-by: Jussi Kivilinna diff --git a/cipher/poly1305.c b/cipher/poly1305.c index 68d9b90..571f828 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -130,7 +130,7 @@ static void poly1305_init (poly1305_context_t *ctx, /* carry propagation */ \ H2 = H0 & 3; \ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ - ADD_1305_64(H2, H1, H0, 0, x0_hi, x0_lo); \ + ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ } while (0) unsigned int ----------------------------------------------------------------------- Summary of changes: cipher/camellia-aarch64.S | 4 ++-- cipher/poly1305.c | 2 +- cipher/rijndael-armv8-aarch64-ce.S | 12 ++++++++---- mpi/aarch64/mpih-add1.S | 18 +++++++++--------- mpi/aarch64/mpih-mul1.S | 18 +++++++++--------- mpi/aarch64/mpih-mul2.S | 18 +++++++++--------- mpi/aarch64/mpih-mul3.S | 18 +++++++++--------- mpi/aarch64/mpih-sub1.S | 18 +++++++++--------- 8 files changed, 56 insertions(+), 52 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits