From cvs at cvs.gnupg.org Fri Nov 2 04:18:13 2018 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Fri, 02 Nov 2018 04:18:13 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-114-gf462868 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via f46286851158878d5041ac5381b2807ecec541eb (commit) from 4a4d4a284ca996df874e2534f8529c1611289943 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit f46286851158878d5041ac5381b2807ecec541eb Author: NIIBE Yutaka Date: Fri Nov 2 12:06:11 2018 +0900 build: Update gpg-error.m4 and libgcrypt.m4. * m4/gpg-error.m4: Update to 2018-11-02. * src/libgrypt.m4: Add AC_MSG_NOTICE. Bump the version date. Signed-off-by: NIIBE Yutaka diff --git a/m4/gpg-error.m4 b/m4/gpg-error.m4 index 0964a26..a9d572f 100644 --- a/m4/gpg-error.m4 +++ b/m4/gpg-error.m4 @@ -9,7 +9,7 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Last-changed: 2018-10-29 +# Last-changed: 2018-11-02 dnl AM_PATH_GPG_ERROR([MINIMUM-VERSION, @@ -64,15 +64,42 @@ AC_DEFUN([AM_PATH_GPG_ERROR], min_gpg_error_version=ifelse([$1], ,1.33,$1) ok=no - use_gpgrt_config="" - if test "$GPG_ERROR_CONFIG" = "no"; then + if test "$prefix" = NONE ; then + prefix_option_expanded=/usr/local + else + prefix_option_expanded="$prefix" + fi + if test "$exec_prefix" = NONE ; then + exec_prefix_option_expanded=$prefix_option_expanded + else + exec_prefix_option_expanded=$(prefix=$prefix_option_expanded eval echo $exec_prefix) + fi + libdir_option_expanded=$(prefix=$prefix_option_expanded exec_prefix=$exec_prefix_option_expanded eval echo $libdir) + + if test -f $libdir_option_expanded/pkgconfig/gpg-error.pc; then + gpgrt_libdir=$libdir_option_expanded + else + if crt1_path=$(${CC:-cc} -print-file-name=crt1.o 2>/dev/null); then + if possible_libdir=$(cd ${crt1_path%/*} && pwd 2>/dev/null); then + if test -f $possible_libdir/pkgconfig/gpg-error.pc; then + gpgrt_libdir=$possible_libdir + fi + fi + fi + fi + + if test "$GPG_ERROR_CONFIG" = "no" -a -n "$gpgrt_libdir"; then AC_PATH_PROG(GPGRT_CONFIG, gpgrt-config, no) - if test "$GPGRT_CONFIG" != "no"; then - GPGRT_CONFIG="$GPGRT_CONFIG --prefix=$prefix --exec-prefix=$exec_prefix --libdir=$libdir" + if test "$GPGRT_CONFIG" = "no"; then + unset GPGRT_CONFIG + else + GPGRT_CONFIG="$GPGRT_CONFIG --libdir=$gpgrt_libdir" if $GPGRT_CONFIG gpg-error >/dev/null 2>&1; then GPG_ERROR_CONFIG="$GPGRT_CONFIG gpg-error" - use_gpgrt_config=yes + AC_MSG_NOTICE([Use gpgrt-config with $gpgrt_libdir as gpg-error-config]) gpg_error_config_version=`$GPG_ERROR_CONFIG --modversion` + else + unset GPGRT_CONFIG fi fi else @@ -96,13 +123,19 @@ AC_DEFUN([AM_PATH_GPG_ERROR], fi fi fi - if test -z "$GPGRT_CONFIG"; then + if test -z "$GPGRT_CONFIG" -a -n "$gpgrt_libdir"; then if test "$major" -gt 1 -o "$major" -eq 1 -a "$minor" -ge 33; then AC_PATH_PROG(GPGRT_CONFIG, gpgrt-config, no) - if test "$GPGRT_CONFIG" != "no"; then - GPGRT_CONFIG="$GPGRT_CONFIG --prefix=$prefix --exec-prefix=$exec_prefix --libdir=$libdir" - GPG_ERROR_CONFIG="$GPGRT_CONFIG gpg-error" - use_gpgrt_config=yes + if test "$GPGRT_CONFIG" = "no"; then + unset GPGRT_CONFIG + else + GPGRT_CONFIG="$GPGRT_CONFIG --libdir=$gpgrt_libdir" + if $GPGRT_CONFIG gpg-error >/dev/null 2>&1; then + GPG_ERROR_CONFIG="$GPGRT_CONFIG gpg-error" + AC_MSG_NOTICE([Use gpgrt-config with $gpgrt_libdir as gpg-error-config]) + else + unset GPGRT_CONFIG + fi fi fi fi @@ -111,7 +144,7 @@ AC_DEFUN([AM_PATH_GPG_ERROR], if test $ok = yes; then GPG_ERROR_CFLAGS=`$GPG_ERROR_CONFIG --cflags` GPG_ERROR_LIBS=`$GPG_ERROR_CONFIG --libs` - if test -z "$use_gpgrt_config"; then + if test -z "$GPGRT_CONFIG"; then GPG_ERROR_MT_CFLAGS=`$GPG_ERROR_CONFIG --mt --cflags 2>/dev/null` GPG_ERROR_MT_LIBS=`$GPG_ERROR_CONFIG --mt --libs 2>/dev/null` else @@ -122,7 +155,7 @@ AC_DEFUN([AM_PATH_GPG_ERROR], fi AC_MSG_RESULT([yes ($gpg_error_config_version)]) ifelse([$2], , :, [$2]) - if test -z "$use_gpgrt_config"; then + if test -z "$GPGRT_CONFIG"; then gpg_error_config_host=`$GPG_ERROR_CONFIG --host 2>/dev/null || echo none` else gpg_error_config_host=`$GPG_ERROR_CONFIG --variable=host 2>/dev/null || echo none` diff --git a/src/libgcrypt.m4 b/src/libgcrypt.m4 index 2646e96..40ea01c 100644 --- a/src/libgcrypt.m4 +++ b/src/libgcrypt.m4 @@ -9,7 +9,7 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Last-changed: 2018-10-29 +# Last-changed: 2018-11-02 dnl AM_PATH_LIBGCRYPT([MINIMUM-VERSION, @@ -56,6 +56,7 @@ AC_DEFUN([AM_PATH_LIBGCRYPT], if test x"${LIBGCRYPT_CONFIG}" = x -a x"$GPGRT_CONFIG" != x -a "$GPGRT_CONFIG" != "no"; then if $GPGRT_CONFIG libgcrypt --exists; then LIBGCRYPT_CONFIG="$GPGRT_CONFIG libgcrypt" + AC_MSG_NOTICE([Use gpgrt-config as libgcrypt-config]) use_gpgrt_config=yes fi fi ----------------------------------------------------------------------- Summary of changes: m4/gpg-error.m4 | 59 +++++++++++++++++++++++++++++++++++++++++++------------- src/libgcrypt.m4 | 3 ++- 2 files changed, 48 insertions(+), 14 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Fri Nov 2 05:54:56 2018 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Fri, 02 Nov 2018 05:54:56 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-115-gf739533 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via f7395338d71d4d82180a11707fd6e77787162e24 (commit) from f46286851158878d5041ac5381b2807ecec541eb (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit f7395338d71d4d82180a11707fd6e77787162e24 Author: NIIBE Yutaka Date: Fri Nov 2 13:51:40 2018 +0900 build: Fix GCRYPT_HWF_MODULES. * configure.ac (GCRYPT_HWF_MODULES): Add libgcrypt_la- prefix. -- Before this change "make distcheck" fails because src/.deps/hwf-x86.Plo remains. Note that the distclean entry for the file is libgcrypt_la-hwf-x86.Plo. Signed-off-by: NIIBE Yutaka diff --git a/configure.ac b/configure.ac index cdce339..4e4f1f7 100644 --- a/configure.ac +++ b/configure.ac @@ -2588,7 +2588,7 @@ GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) - GCRYPT_HWF_MODULES="hwf-x86.lo" + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) @@ -2607,11 +2607,11 @@ case "$mpi_cpu_arch" in ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) - GCRYPT_HWF_MODULES="hwf-arm.lo" + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) - GCRYPT_HWF_MODULES="hwf-arm.lo" + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) ----------------------------------------------------------------------- Summary of changes: configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Fri Nov 2 11:39:41 2018 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Fri, 02 Nov 2018 11:39:41 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-116-ga2e0cb1 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via a2e0cb1542818ad8a71de34ccbf191adab0a0b86 (commit) from f7395338d71d4d82180a11707fd6e77787162e24 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit a2e0cb1542818ad8a71de34ccbf191adab0a0b86 Author: NIIBE Yutaka Date: Fri Nov 2 18:54:02 2018 +0900 aarch64: mpi: Distribute the header file as a part of source. * mpi/Makefile.am (EXTRA_libmpi_la_SOURCES): Add asm-common-aarch64.h. -- Fixes-commit: ec0a2f25c0f64a7b65b373508ce9081e10461965 Signed-off-by: NIIBE Yutaka diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 8f39ee7..4a8d888 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -175,3 +175,4 @@ libmpi_la_SOURCES = longlong.h \ mpih-mul.c \ mpiutil.c \ ec.c ec-internal.h ec-ed25519.c +EXTRA_libmpi_la_SOURCES = asm-common-aarch64.h ----------------------------------------------------------------------- Summary of changes: mpi/Makefile.am | 1 + 1 file changed, 1 insertion(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Sat Nov 3 13:46:40 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sat, 3 Nov 2018 14:46:40 +0200 Subject: [PATCH 1/2] secmem: fix potential memory visibility issue Message-ID: <154124920023.10618.940566958864951210.stgit@localhost.localdomain> * configure.ac (gcry_cv_have_sync_synchronize): New check. * src/secmem.c (pooldesc_s): Make next pointer volatile. (memory_barrier): New. (_gcry_secmem_malloc_internal): Insert memory barrier between pool->next and mainpool.next assigments. (_gcry_private_is_secure): Update comments. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/configure.ac b/configure.ac index cdce3394d..36906d51b 100644 --- a/configure.ac +++ b/configure.ac @@ -858,6 +858,21 @@ if test "$gcry_cv_have_builtin_ctz" = "yes" ; then fi +# +# Check for __sync_synchronize intrinsic. +# +AC_CACHE_CHECK(for __sync_synchronize, + [gcry_cv_have_sync_synchronize], + [gcry_cv_have_sync_synchronize=no + AC_LINK_IFELSE([AC_LANG_PROGRAM([], + [__sync_synchronize(); return 0;])], + [gcry_cv_have_sync_synchronize=yes])]) +if test "$gcry_cv_have_sync_synchronize" = "yes" ; then + AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, + [Defined if compiler has '__sync_synchronize' intrinsic]) +fi + + # # Check for VLA support (variable length arrays). # diff --git a/src/secmem.c b/src/secmem.c index 3e091d88f..b6f07c54d 100644 --- a/src/secmem.c +++ b/src/secmem.c @@ -64,7 +64,7 @@ typedef struct pooldesc_s { /* A link to the next pool. This is used to connect the overflow * pools. */ - struct pooldesc_s *next; + struct pooldesc_s * volatile next; /* A memory buffer used as allocation pool. */ void *mem; @@ -118,6 +118,29 @@ GPGRT_LOCK_DEFINE (secmem_lock); #define ADDR_TO_BLOCK(addr) \ (memblock_t *) (void *) ((char *) addr - BLOCK_HEAD_SIZE) + +/* Memory barrier */ +static inline void +memory_barrier(void) +{ +#ifdef HAVE_SYNC_SYNCHRONIZE +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY + asm volatile ("":::"memory"); +#endif + /* Use GCC / clang intrinsic for memory barrier. */ + __sync_synchronize(); +#else + /* Slow portable alternative, implement memory barrier by using mutex. */ + gpgrt_lock_t tmp; + memset (&tmp, 0, sizeof(tmp)); + gpgrt_lock_init (&tmp); + gpgrt_lock_lock (&tmp); + gpgrt_lock_unlock (&tmp); + gpgrt_lock_destroy (&tmp); +#endif +} + + /* Check whether P points into POOL. */ static inline int ptr_into_pool_p (pooldesc_t *pool, const void *p) @@ -520,7 +543,7 @@ _gcry_secmem_get_flags (void) } -/* This function initializes the main memory pool MAINPOOL. Itis +/* This function initializes the main memory pool MAINPOOL. It is * expected to be called with the secmem lock held. */ static void _gcry_secmem_init_internal (size_t n) @@ -670,8 +693,12 @@ _gcry_secmem_malloc_internal (size_t size, int xhint) pool->okay = 1; /* Take care: in _gcry_private_is_secure we do not lock and thus - * we assume that the second assignment below is atomic. */ + * we assume that the second assignment below is atomic. Memory + * barrier prevents reordering of stores to new pool structure after + * MAINPOOL.NEXT assigment and prevents _gcry_private_is_secure seeing + * non-initialized POOL->NEXT pointers. */ pool->next = mainpool.next; + memory_barrier(); mainpool.next = pool; /* After the first time we allocated an overflow pool, print a @@ -811,9 +838,13 @@ _gcry_private_is_secure (const void *p) { pooldesc_t *pool; - /* We do no lock here because once a pool is allocatred it will not - * be removed anymore (except for gcry_secmem_term). Further, - * adding a new pool to the list should be atomic. */ + /* We do no lock here because once a pool is allocated it will not + * be removed anymore (except for gcry_secmem_term). Further, as + * assigment of POOL->NEXT in new pool structure is visible in + * this thread before assigment of MAINPOOL.NEXT, pool list can be + * iterated locklessly. This visiblity is ensured by memory barrier + * between POOL->NEXT and MAINPOOL.NEXT assignments in + * _gcry_secmem_malloc_internal. */ for (pool = &mainpool; pool; pool = pool->next) if (pool->okay && ptr_into_pool_p (pool, p)) return 1; From jussi.kivilinna at iki.fi Sat Nov 3 13:46:45 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sat, 3 Nov 2018 14:46:45 +0200 Subject: [PATCH 2/2] stdmem: free: only call _gcry_secmem_free if needed In-Reply-To: <154124920023.10618.940566958864951210.stgit@localhost.localdomain> References: <154124920023.10618.940566958864951210.stgit@localhost.localdomain> Message-ID: <154124920541.10618.4147602951283158748.stgit@localhost.localdomain> * src/stdmem.c (_gcry_private_free): Check if memory is secure before calling _gcry_secmem_free to avoid unnecessarily taking secmem lock. -- Unnecessarily taking secmem lock on non-secure memory can result poor performance on multi-threaded workloads: https://lists.gnupg.org/pipermail/gcrypt-devel/2018-August/004535.html Reported-by: Christian Grothoff Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/src/stdmem.c b/src/stdmem.c index cbda8d895..04ce64fba 100644 --- a/src/stdmem.c +++ b/src/stdmem.c @@ -224,19 +224,23 @@ void _gcry_private_free (void *a) { unsigned char *p = a; + unsigned char *freep; if (!p) return; - if (use_m_guard ) + if (use_m_guard) { - _gcry_private_check_heap(p); - if (! _gcry_secmem_free (p - EXTRA_ALIGN - 4)) - { - free (p - EXTRA_ALIGN - 4); - } + _gcry_private_check_heap (p); + freep = p - EXTRA_ALIGN - 4; + } + else + { + freep = p; } - else if (!_gcry_secmem_free (p)) + + if (!_gcry_private_is_secure (freep) || + !_gcry_secmem_free (freep)) { - free(p); + free (freep); } } From jussi.kivilinna at iki.fi Sat Nov 3 13:48:31 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sat, 3 Nov 2018 14:48:31 +0200 Subject: [PATCH] wipememory: use memset for non-constant length or large buffer wipes Message-ID: <154124931157.11032.18078485384346817104.stgit@localhost.localdomain> * src/g10lib.h (CONSTANT_P): New. (_gcry_wipememory2): New prototype. (wipememory2): Use _gcry_wipememory2 if _len not constant expression or lenght is larger than 64 bytes. (FASTWIPE_T, FASTWIPE_MULT, fast_wipememory2_unaligned_head): Remove. (fast_wipememory2): Always handle buffer as unaligned. * src/misc.c (__gcry_burn_stack): Move memset_ptr variable to... (memset_ptr): ... here. New. (_gcry_wipememory2): New. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/src/g10lib.h b/src/g10lib.h index c64cbcf2a..9b2147812 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -76,11 +76,13 @@ #endif #if __GNUC__ >= 3 -#define LIKELY( expr ) __builtin_expect( !!(expr), 1 ) -#define UNLIKELY( expr ) __builtin_expect( !!(expr), 0 ) +#define LIKELY(expr) __builtin_expect( !!(expr), 1 ) +#define UNLIKELY(expr) __builtin_expect( !!(expr), 0 ) +#define CONSTANT_P(expr) __builtin_constant_p( expr ) #else -#define LIKELY( expr ) (!!(expr)) -#define UNLIKELY( expr ) (!!(expr)) +#define LIKELY(expr) (!!(expr)) +#define UNLIKELY(expr) (!!(expr)) +#define CONSTANT_P(expr) (0) #endif /* Gettext macros. */ @@ -334,60 +336,50 @@ void __gcry_burn_stack (unsigned int bytes); /* To avoid that a compiler optimizes certain memset calls away, these - macros may be used instead. */ + macros may be used instead. For small constant length buffers, + memory wiping is inlined. For non-constant or large length buffers, + memory is wiped with memset through _gcry_wipememory. */ +void _gcry_wipememory2(void *ptr, int set, size_t len); #define wipememory2(_ptr,_set,_len) do { \ - volatile char *_vptr=(volatile char *)(_ptr); \ - size_t _vlen=(_len); \ - unsigned char _vset=(_set); \ - fast_wipememory2(_vptr,_vset,_vlen); \ - while(_vlen) { *_vptr=(_vset); _vptr++; _vlen--; } \ - } while(0) + if (!CONSTANT_P(_len) || _len > 64) { \ + _gcry_wipememory2((void *)_ptr, _set, _len); \ + } else {\ + volatile char *_vptr = (volatile char *)(_ptr); \ + size_t _vlen = (_len); \ + const unsigned char _vset = (_set); \ + fast_wipememory2(_vptr, _vset, _vlen); \ + while(_vlen) { *_vptr = (_vset); _vptr++; _vlen--; } \ + } \ + } while(0) #define wipememory(_ptr,_len) wipememory2(_ptr,0,_len) -#define FASTWIPE_T u64 -#define FASTWIPE_MULT (U64_C(0x0101010101010101)) - -/* Following architectures can handle unaligned accesses fast. */ #if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ - defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) && \ - (defined(__i386__) || defined(__x86_64__) || \ - defined(__powerpc__) || defined(__powerpc64__) || \ - (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ - defined(__aarch64__)) -#define fast_wipememory2_unaligned_head(_ptr,_set,_len) /*do nothing*/ + defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) typedef struct fast_wipememory_s { - FASTWIPE_T a; + u64 a; } __attribute__((packed, aligned(1), may_alias)) fast_wipememory_t; +/* fast_wipememory may leave tail bytes unhandled, in which case tail bytes + are handled by wipememory. */ +# define fast_wipememory2(_vptr,_vset,_vlen) do { \ + fast_wipememory_t _vset_long; \ + if (_vlen < sizeof(fast_wipememory_t)) \ + break; \ + _vset_long.a = (_vset); \ + _vset_long.a *= U64_C(0x0101010101010101); \ + do { \ + volatile fast_wipememory_t *_vptr_long = \ + (volatile void *)_vptr; \ + _vptr_long->a = _vset_long.a; \ + _vlen -= sizeof(fast_wipememory_t); \ + _vptr += sizeof(fast_wipememory_t); \ + } while (_vlen >= sizeof(fast_wipememory_t)); \ + } while (0) #else -#define fast_wipememory2_unaligned_head(_vptr,_vset,_vlen) do { \ - while(UNLIKELY((size_t)(_vptr)&(sizeof(FASTWIPE_T)-1)) && _vlen) \ - { *_vptr=(_vset); _vptr++; _vlen--; } \ - } while(0) -typedef struct fast_wipememory_s -{ - FASTWIPE_T a; -} fast_wipememory_t; +# define fast_wipememory2(_vptr,_vset,_vlen) #endif -/* fast_wipememory2 may leave tail bytes unhandled, in which case tail bytes - are handled by wipememory2. */ -#define fast_wipememory2(_vptr,_vset,_vlen) do { \ - FASTWIPE_T _vset_long = _vset; \ - fast_wipememory2_unaligned_head(_vptr,_vset,_vlen); \ - if (_vlen < sizeof(FASTWIPE_T)) \ - break; \ - _vset_long *= FASTWIPE_MULT; \ - do { \ - volatile fast_wipememory_t *_vptr_long = \ - (volatile void *)_vptr; \ - _vptr_long->a = _vset_long; \ - _vlen -= sizeof(FASTWIPE_T); \ - _vptr += sizeof(FASTWIPE_T); \ - } while (_vlen >= sizeof(FASTWIPE_T)); \ - } while (0) - /* Digit predicates. */ diff --git a/src/misc.c b/src/misc.c index 47d2dc712..420ce74db 100644 --- a/src/misc.c +++ b/src/misc.c @@ -32,6 +32,8 @@ static int verbosity_level = 0; +static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; + static void (*fatal_error_handler)(void*,int, const char*) = NULL; static void *fatal_error_handler_value = 0; static void (*log_handler)(void*,int, const char*, va_list) = NULL; @@ -497,23 +499,29 @@ _gcry_strtokenize (const char *string, const char *delim) } +void +_gcry_wipememory2 (void *ptr, int set, size_t len) +{ + memset_ptr (ptr, set, len); +} + + void __gcry_burn_stack (unsigned int bytes) { #ifdef HAVE_VLA - static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; - /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ - unsigned int buflen = ((!bytes + bytes) + 63) & ~63; - char buf[buflen]; + /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ + unsigned int buflen = ((!bytes + bytes) + 63) & ~63; + char buf[buflen]; - memset_ptr (buf, 0, sizeof buf); + memset_ptr (buf, 0, buflen); #else - volatile char buf[64]; + volatile char buf[64]; - wipememory (buf, sizeof buf); + wipememory (buf, sizeof buf); - if (bytes > sizeof buf) - _gcry_burn_stack (bytes - sizeof buf); + if (bytes > sizeof buf) + _gcry_burn_stack (bytes - sizeof buf); #endif } From wk at gnupg.org Mon Nov 5 10:56:59 2018 From: wk at gnupg.org (Werner Koch) Date: Mon, 05 Nov 2018 10:56:59 +0100 Subject: [PATCH 1/2] secmem: fix potential memory visibility issue In-Reply-To: <154124920023.10618.940566958864951210.stgit@localhost.localdomain> (Jussi Kivilinna's message of "Sat, 3 Nov 2018 14:46:40 +0200") References: <154124920023.10618.940566958864951210.stgit@localhost.localdomain> Message-ID: <87va5b979w.fsf@wheatstone.g10code.de> Hi! thanks for comments and the memory barrier work. Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 227 bytes Desc: not available URL: From cvs at cvs.gnupg.org Mon Nov 5 19:58:26 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Mon, 05 Nov 2018 19:58:26 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-123-g23f56d3 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 23f56d3359ca7d152aa87874ddd6305171a91408 (commit) via d6c6680ca31c05bafbb8becda56da051346eceb3 (commit) via 4faeaa1cbd235a2560fa04a8ac3766a07029acd8 (commit) via 0068d41d9304ebcdb2caba1fa8848925e2bfaac7 (commit) via 30e783ec487466132324673f197d36b85a91b060 (commit) via ec49013d23d9a7b874c42d77ceb08bd313ba69e1 (commit) via 2aece89d3967e692743541cea857f2e4771b0b62 (commit) from a2e0cb1542818ad8a71de34ccbf191adab0a0b86 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 23f56d3359ca7d152aa87874ddd6305171a91408 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 stdmem: free: only call _gcry_secmem_free if needed * src/stdmem.c (_gcry_private_free): Check if memory is secure before calling _gcry_secmem_free to avoid unnecessarily taking secmem lock. -- Unnecessarily taking secmem lock on non-secure memory can result poor performance on multi-threaded workloads: https://lists.gnupg.org/pipermail/gcrypt-devel/2018-August/004535.html Reported-by: Christian Grothoff Signed-off-by: Jussi Kivilinna diff --git a/src/stdmem.c b/src/stdmem.c index cbda8d8..04ce64f 100644 --- a/src/stdmem.c +++ b/src/stdmem.c @@ -224,19 +224,23 @@ void _gcry_private_free (void *a) { unsigned char *p = a; + unsigned char *freep; if (!p) return; - if (use_m_guard ) + if (use_m_guard) { - _gcry_private_check_heap(p); - if (! _gcry_secmem_free (p - EXTRA_ALIGN - 4)) - { - free (p - EXTRA_ALIGN - 4); - } + _gcry_private_check_heap (p); + freep = p - EXTRA_ALIGN - 4; + } + else + { + freep = p; } - else if (!_gcry_secmem_free (p)) + + if (!_gcry_private_is_secure (freep) || + !_gcry_secmem_free (freep)) { - free(p); + free (freep); } } commit d6c6680ca31c05bafbb8becda56da051346eceb3 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 secmem: fix potential memory visibility issue * configure.ac (gcry_cv_have_sync_synchronize): New check. * src/secmem.c (pooldesc_s): Make next pointer volatile. (memory_barrier): New. (_gcry_secmem_malloc_internal): Insert memory barrier between pool->next and mainpool.next assigments. (_gcry_private_is_secure): Update comments. -- Signed-off-by: Jussi Kivilinna diff --git a/configure.ac b/configure.ac index 4e4f1f7..9803d51 100644 --- a/configure.ac +++ b/configure.ac @@ -859,6 +859,21 @@ fi # +# Check for __sync_synchronize intrinsic. +# +AC_CACHE_CHECK(for __sync_synchronize, + [gcry_cv_have_sync_synchronize], + [gcry_cv_have_sync_synchronize=no + AC_LINK_IFELSE([AC_LANG_PROGRAM([], + [__sync_synchronize(); return 0;])], + [gcry_cv_have_sync_synchronize=yes])]) +if test "$gcry_cv_have_sync_synchronize" = "yes" ; then + AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, + [Defined if compiler has '__sync_synchronize' intrinsic]) +fi + + +# # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, diff --git a/src/secmem.c b/src/secmem.c index 3e091d8..b6f07c5 100644 --- a/src/secmem.c +++ b/src/secmem.c @@ -64,7 +64,7 @@ typedef struct pooldesc_s { /* A link to the next pool. This is used to connect the overflow * pools. */ - struct pooldesc_s *next; + struct pooldesc_s * volatile next; /* A memory buffer used as allocation pool. */ void *mem; @@ -118,6 +118,29 @@ GPGRT_LOCK_DEFINE (secmem_lock); #define ADDR_TO_BLOCK(addr) \ (memblock_t *) (void *) ((char *) addr - BLOCK_HEAD_SIZE) + +/* Memory barrier */ +static inline void +memory_barrier(void) +{ +#ifdef HAVE_SYNC_SYNCHRONIZE +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY + asm volatile ("":::"memory"); +#endif + /* Use GCC / clang intrinsic for memory barrier. */ + __sync_synchronize(); +#else + /* Slow portable alternative, implement memory barrier by using mutex. */ + gpgrt_lock_t tmp; + memset (&tmp, 0, sizeof(tmp)); + gpgrt_lock_init (&tmp); + gpgrt_lock_lock (&tmp); + gpgrt_lock_unlock (&tmp); + gpgrt_lock_destroy (&tmp); +#endif +} + + /* Check whether P points into POOL. */ static inline int ptr_into_pool_p (pooldesc_t *pool, const void *p) @@ -520,7 +543,7 @@ _gcry_secmem_get_flags (void) } -/* This function initializes the main memory pool MAINPOOL. Itis +/* This function initializes the main memory pool MAINPOOL. It is * expected to be called with the secmem lock held. */ static void _gcry_secmem_init_internal (size_t n) @@ -670,8 +693,12 @@ _gcry_secmem_malloc_internal (size_t size, int xhint) pool->okay = 1; /* Take care: in _gcry_private_is_secure we do not lock and thus - * we assume that the second assignment below is atomic. */ + * we assume that the second assignment below is atomic. Memory + * barrier prevents reordering of stores to new pool structure after + * MAINPOOL.NEXT assigment and prevents _gcry_private_is_secure seeing + * non-initialized POOL->NEXT pointers. */ pool->next = mainpool.next; + memory_barrier(); mainpool.next = pool; /* After the first time we allocated an overflow pool, print a @@ -811,9 +838,13 @@ _gcry_private_is_secure (const void *p) { pooldesc_t *pool; - /* We do no lock here because once a pool is allocatred it will not - * be removed anymore (except for gcry_secmem_term). Further, - * adding a new pool to the list should be atomic. */ + /* We do no lock here because once a pool is allocated it will not + * be removed anymore (except for gcry_secmem_term). Further, as + * assigment of POOL->NEXT in new pool structure is visible in + * this thread before assigment of MAINPOOL.NEXT, pool list can be + * iterated locklessly. This visiblity is ensured by memory barrier + * between POOL->NEXT and MAINPOOL.NEXT assignments in + * _gcry_secmem_malloc_internal. */ for (pool = &mainpool; pool; pool = pool->next) if (pool->okay && ptr_into_pool_p (pool, p)) return 1; commit 4faeaa1cbd235a2560fa04a8ac3766a07029acd8 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 wipememory: use memset for non-constant length or large buffer wipes * src/g10lib.h (CONSTANT_P): New. (_gcry_wipememory2): New prototype. (wipememory2): Use _gcry_wipememory2 if _len not constant expression or lenght is larger than 64 bytes. (FASTWIPE_T, FASTWIPE_MULT, fast_wipememory2_unaligned_head): Remove. (fast_wipememory2): Always handle buffer as unaligned. * src/misc.c (__gcry_burn_stack): Move memset_ptr variable to... (memset_ptr): ... here. New. (_gcry_wipememory2): New. -- Signed-off-by: Jussi Kivilinna diff --git a/src/g10lib.h b/src/g10lib.h index c64cbcf..9b21478 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -76,11 +76,13 @@ #endif #if __GNUC__ >= 3 -#define LIKELY( expr ) __builtin_expect( !!(expr), 1 ) -#define UNLIKELY( expr ) __builtin_expect( !!(expr), 0 ) +#define LIKELY(expr) __builtin_expect( !!(expr), 1 ) +#define UNLIKELY(expr) __builtin_expect( !!(expr), 0 ) +#define CONSTANT_P(expr) __builtin_constant_p( expr ) #else -#define LIKELY( expr ) (!!(expr)) -#define UNLIKELY( expr ) (!!(expr)) +#define LIKELY(expr) (!!(expr)) +#define UNLIKELY(expr) (!!(expr)) +#define CONSTANT_P(expr) (0) #endif /* Gettext macros. */ @@ -334,60 +336,50 @@ void __gcry_burn_stack (unsigned int bytes); /* To avoid that a compiler optimizes certain memset calls away, these - macros may be used instead. */ + macros may be used instead. For small constant length buffers, + memory wiping is inlined. For non-constant or large length buffers, + memory is wiped with memset through _gcry_wipememory. */ +void _gcry_wipememory2(void *ptr, int set, size_t len); #define wipememory2(_ptr,_set,_len) do { \ - volatile char *_vptr=(volatile char *)(_ptr); \ - size_t _vlen=(_len); \ - unsigned char _vset=(_set); \ - fast_wipememory2(_vptr,_vset,_vlen); \ - while(_vlen) { *_vptr=(_vset); _vptr++; _vlen--; } \ - } while(0) + if (!CONSTANT_P(_len) || _len > 64) { \ + _gcry_wipememory2((void *)_ptr, _set, _len); \ + } else {\ + volatile char *_vptr = (volatile char *)(_ptr); \ + size_t _vlen = (_len); \ + const unsigned char _vset = (_set); \ + fast_wipememory2(_vptr, _vset, _vlen); \ + while(_vlen) { *_vptr = (_vset); _vptr++; _vlen--; } \ + } \ + } while(0) #define wipememory(_ptr,_len) wipememory2(_ptr,0,_len) -#define FASTWIPE_T u64 -#define FASTWIPE_MULT (U64_C(0x0101010101010101)) - -/* Following architectures can handle unaligned accesses fast. */ #if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ - defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) && \ - (defined(__i386__) || defined(__x86_64__) || \ - defined(__powerpc__) || defined(__powerpc64__) || \ - (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ - defined(__aarch64__)) -#define fast_wipememory2_unaligned_head(_ptr,_set,_len) /*do nothing*/ + defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) typedef struct fast_wipememory_s { - FASTWIPE_T a; + u64 a; } __attribute__((packed, aligned(1), may_alias)) fast_wipememory_t; +/* fast_wipememory may leave tail bytes unhandled, in which case tail bytes + are handled by wipememory. */ +# define fast_wipememory2(_vptr,_vset,_vlen) do { \ + fast_wipememory_t _vset_long; \ + if (_vlen < sizeof(fast_wipememory_t)) \ + break; \ + _vset_long.a = (_vset); \ + _vset_long.a *= U64_C(0x0101010101010101); \ + do { \ + volatile fast_wipememory_t *_vptr_long = \ + (volatile void *)_vptr; \ + _vptr_long->a = _vset_long.a; \ + _vlen -= sizeof(fast_wipememory_t); \ + _vptr += sizeof(fast_wipememory_t); \ + } while (_vlen >= sizeof(fast_wipememory_t)); \ + } while (0) #else -#define fast_wipememory2_unaligned_head(_vptr,_vset,_vlen) do { \ - while(UNLIKELY((size_t)(_vptr)&(sizeof(FASTWIPE_T)-1)) && _vlen) \ - { *_vptr=(_vset); _vptr++; _vlen--; } \ - } while(0) -typedef struct fast_wipememory_s -{ - FASTWIPE_T a; -} fast_wipememory_t; +# define fast_wipememory2(_vptr,_vset,_vlen) #endif -/* fast_wipememory2 may leave tail bytes unhandled, in which case tail bytes - are handled by wipememory2. */ -#define fast_wipememory2(_vptr,_vset,_vlen) do { \ - FASTWIPE_T _vset_long = _vset; \ - fast_wipememory2_unaligned_head(_vptr,_vset,_vlen); \ - if (_vlen < sizeof(FASTWIPE_T)) \ - break; \ - _vset_long *= FASTWIPE_MULT; \ - do { \ - volatile fast_wipememory_t *_vptr_long = \ - (volatile void *)_vptr; \ - _vptr_long->a = _vset_long; \ - _vlen -= sizeof(FASTWIPE_T); \ - _vptr += sizeof(FASTWIPE_T); \ - } while (_vlen >= sizeof(FASTWIPE_T)); \ - } while (0) - /* Digit predicates. */ diff --git a/src/misc.c b/src/misc.c index 47d2dc7..420ce74 100644 --- a/src/misc.c +++ b/src/misc.c @@ -32,6 +32,8 @@ static int verbosity_level = 0; +static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; + static void (*fatal_error_handler)(void*,int, const char*) = NULL; static void *fatal_error_handler_value = 0; static void (*log_handler)(void*,int, const char*, va_list) = NULL; @@ -498,22 +500,28 @@ _gcry_strtokenize (const char *string, const char *delim) void +_gcry_wipememory2 (void *ptr, int set, size_t len) +{ + memset_ptr (ptr, set, len); +} + + +void __gcry_burn_stack (unsigned int bytes) { #ifdef HAVE_VLA - static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; - /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ - unsigned int buflen = ((!bytes + bytes) + 63) & ~63; - char buf[buflen]; + /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */ + unsigned int buflen = ((!bytes + bytes) + 63) & ~63; + char buf[buflen]; - memset_ptr (buf, 0, sizeof buf); + memset_ptr (buf, 0, buflen); #else - volatile char buf[64]; + volatile char buf[64]; - wipememory (buf, sizeof buf); + wipememory (buf, sizeof buf); - if (bytes > sizeof buf) - _gcry_burn_stack (bytes - sizeof buf); + if (bytes > sizeof buf) + _gcry_burn_stack (bytes - sizeof buf); #endif } commit 0068d41d9304ebcdb2caba1fa8848925e2bfaac7 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 Change buf_cpy and buf_xor* functions to use buf_put/buf_get helpers * cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS) (bufhelp_int_s, buf_xor_1): Remove. (buf_cpy, buf_xor, buf_xor_2dst, buf_xor_n_copy_2): Use buf_put/buf_get helpers to handle unaligned memory accesses. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h index 4e97c4d..0e8f599 100644 --- a/cipher/bufhelp.h +++ b/cipher/bufhelp.h @@ -35,277 +35,6 @@ #endif -#undef BUFHELP_FAST_UNALIGNED_ACCESS -#if defined(BUFHELP_UNALIGNED_ACCESS) && \ - (defined(__i386__) || defined(__x86_64__) || \ - (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ - defined(__aarch64__)) -/* These architectures are able of unaligned memory accesses and can - handle those fast. - */ -# define BUFHELP_FAST_UNALIGNED_ACCESS 1 -#endif - - -#ifdef BUFHELP_FAST_UNALIGNED_ACCESS -/* Define type with one-byte alignment on architectures with fast unaligned - memory accesses. - */ -typedef struct bufhelp_int_s -{ - uintptr_t a; -} __attribute__((packed, aligned(1), may_alias)) bufhelp_int_t; -#else -/* Define type with default alignment for other architectures (unaligned - accessed handled in per byte loops). - */ -#ifdef HAVE_GCC_ATTRIBUTE_MAY_ALIAS -typedef struct bufhelp_int_s -{ - uintptr_t a; -} __attribute__((may_alias)) bufhelp_int_t; -#else -typedef struct bufhelp_int_s -{ - uintptr_t a; -} bufhelp_int_t; -#endif -#endif - - -/* Optimized function for small buffer copying */ -static inline void -buf_cpy(void *_dst, const void *_src, size_t len) -{ -#if __GNUC__ >= 4 && (defined(__x86_64__) || defined(__i386__)) - /* For AMD64 and i386, memcpy is faster. */ - memcpy(_dst, _src, len); -#else - byte *dst = _dst; - const byte *src = _src; - bufhelp_int_t *ldst; - const bufhelp_int_t *lsrc; -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(bufhelp_int_t) - 1; - - /* Skip fast processing if buffers are unaligned. */ - if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask)) - goto do_bytes; -#endif - - ldst = (bufhelp_int_t *)(void *)dst; - lsrc = (const bufhelp_int_t *)(const void *)src; - - for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) - (ldst++)->a = (lsrc++)->a; - - dst = (byte *)ldst; - src = (const byte *)lsrc; - -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS -do_bytes: -#endif - /* Handle tail. */ - for (; len; len--) - *dst++ = *src++; -#endif /*__GNUC__ >= 4 && (__x86_64__ || __i386__)*/ -} - - -/* Optimized function for buffer xoring */ -static inline void -buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len) -{ - byte *dst = _dst; - const byte *src1 = _src1; - const byte *src2 = _src2; - bufhelp_int_t *ldst; - const bufhelp_int_t *lsrc1, *lsrc2; -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(bufhelp_int_t) - 1; - - /* Skip fast processing if buffers are unaligned. */ - if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)) - goto do_bytes; -#endif - - ldst = (bufhelp_int_t *)(void *)dst; - lsrc1 = (const bufhelp_int_t *)(const void *)src1; - lsrc2 = (const bufhelp_int_t *)(const void *)src2; - - for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) - (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a; - - dst = (byte *)ldst; - src1 = (const byte *)lsrc1; - src2 = (const byte *)lsrc2; - -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS -do_bytes: -#endif - /* Handle tail. */ - for (; len; len--) - *dst++ = *src1++ ^ *src2++; -} - - -/* Optimized function for in-place buffer xoring. */ -static inline void -buf_xor_1(void *_dst, const void *_src, size_t len) -{ - byte *dst = _dst; - const byte *src = _src; - bufhelp_int_t *ldst; - const bufhelp_int_t *lsrc; -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(bufhelp_int_t) - 1; - - /* Skip fast processing if buffers are unaligned. */ - if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask)) - goto do_bytes; -#endif - - ldst = (bufhelp_int_t *)(void *)dst; - lsrc = (const bufhelp_int_t *)(const void *)src; - - for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) - (ldst++)->a ^= (lsrc++)->a; - - dst = (byte *)ldst; - src = (const byte *)lsrc; - -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS -do_bytes: -#endif - /* Handle tail. */ - for (; len; len--) - *dst++ ^= *src++; -} - - -/* Optimized function for buffer xoring with two destination buffers. Used - mainly by CFB mode encryption. */ -static inline void -buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len) -{ - byte *dst1 = _dst1; - byte *dst2 = _dst2; - const byte *src = _src; - bufhelp_int_t *ldst1, *ldst2; - const bufhelp_int_t *lsrc; -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(bufhelp_int_t) - 1; - - /* Skip fast processing if buffers are unaligned. */ - if (UNLIKELY(((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)) - goto do_bytes; -#endif - - ldst1 = (bufhelp_int_t *)(void *)dst1; - ldst2 = (bufhelp_int_t *)(void *)dst2; - lsrc = (const bufhelp_int_t *)(const void *)src; - - for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) - (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a); - - dst1 = (byte *)ldst1; - dst2 = (byte *)ldst2; - src = (const byte *)lsrc; - -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS -do_bytes: -#endif - /* Handle tail. */ - for (; len; len--) - *dst1++ = (*dst2++ ^= *src++); -} - - -/* Optimized function for combined buffer xoring and copying. Used by mainly - CBC mode decryption. */ -static inline void -buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, - const void *_src_cpy, size_t len) -{ - byte *dst_xor = _dst_xor; - byte *srcdst_cpy = _srcdst_cpy; - const byte *src_xor = _src_xor; - const byte *src_cpy = _src_cpy; - byte temp; - bufhelp_int_t *ldst_xor, *lsrcdst_cpy; - const bufhelp_int_t *lsrc_cpy, *lsrc_xor; - uintptr_t ltemp; -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(bufhelp_int_t) - 1; - - /* Skip fast processing if buffers are unaligned. */ - if (UNLIKELY(((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor | - (uintptr_t)srcdst_cpy) & longmask)) - goto do_bytes; -#endif - - ldst_xor = (bufhelp_int_t *)(void *)dst_xor; - lsrc_xor = (const bufhelp_int_t *)(void *)src_xor; - lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy; - lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy; - - for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) - { - ltemp = (lsrc_cpy++)->a; - (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a; - (lsrcdst_cpy++)->a = ltemp; - } - - dst_xor = (byte *)ldst_xor; - src_xor = (const byte *)lsrc_xor; - srcdst_cpy = (byte *)lsrcdst_cpy; - src_cpy = (const byte *)lsrc_cpy; - -#ifndef BUFHELP_FAST_UNALIGNED_ACCESS -do_bytes: -#endif - /* Handle tail. */ - for (; len; len--) - { - temp = *src_cpy++; - *dst_xor++ = *srcdst_cpy ^ *src_xor++; - *srcdst_cpy++ = temp; - } -} - - -/* Optimized function for combined buffer xoring and copying. Used by mainly - CFB mode decryption. */ -static inline void -buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len) -{ - buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len); -} - - -/* Constant-time compare of two buffers. Returns 1 if buffers are equal, - and 0 if buffers differ. */ -static inline int -buf_eq_const(const void *_a, const void *_b, size_t len) -{ - const byte *a = _a; - const byte *b = _b; - int ab, ba; - size_t i; - - /* Constant-time compare. */ - for (i = 0, ab = 0, ba = 0; i < len; i++) - { - /* If a[i] != b[i], either ab or ba will be negative. */ - ab |= a[i] - b[i]; - ba |= b[i] - a[i]; - } - - /* 'ab | ba' is negative when buffers are not equal. */ - return (ab | ba) >= 0; -} - - #ifndef BUFHELP_UNALIGNED_ACCESS /* Functions for loading and storing unaligned u32 values of different @@ -467,4 +196,188 @@ static inline void buf_put_le64(void *_buf, u64 val) #endif + +/* Optimized function for small buffer copying */ +static inline void +buf_cpy(void *_dst, const void *_src, size_t len) +{ + byte *dst = _dst; + const byte *src = _src; + +#if __GNUC__ >= 4 + if (!__builtin_constant_p (len)) + { + memcpy(_dst, _src, len); + return; + } +#endif + + while (len >= sizeof(u64)) + { + buf_put_he64(dst, buf_get_he64(src)); + dst += sizeof(u64); + src += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + buf_put_he32(dst, buf_get_he32(src)); + dst += sizeof(u32); + src += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst++ = *src++; +} + + +/* Optimized function for buffer xoring */ +static inline void +buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len) +{ + byte *dst = _dst; + const byte *src1 = _src1; + const byte *src2 = _src2; + + while (len >= sizeof(u64)) + { + buf_put_he64(dst, buf_get_he64(src1) ^ buf_get_he64(src2)); + dst += sizeof(u64); + src1 += sizeof(u64); + src2 += sizeof(u64); + len -= sizeof(u64); + } + + if (len > sizeof(u32)) + { + buf_put_he32(dst, buf_get_he32(src1) ^ buf_get_he32(src2)); + dst += sizeof(u32); + src1 += sizeof(u32); + src2 += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst++ = *src1++ ^ *src2++; +} + + +/* Optimized function for buffer xoring with two destination buffers. Used + mainly by CFB mode encryption. */ +static inline void +buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len) +{ + byte *dst1 = _dst1; + byte *dst2 = _dst2; + const byte *src = _src; + + while (len >= sizeof(u64)) + { + u64 temp = buf_get_he64(dst2) ^ buf_get_he64(src); + buf_put_he64(dst2, temp); + buf_put_he64(dst1, temp); + dst2 += sizeof(u64); + dst1 += sizeof(u64); + src += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + u32 temp = buf_get_he32(dst2) ^ buf_get_he32(src); + buf_put_he32(dst2, temp); + buf_put_he32(dst1, temp); + dst2 += sizeof(u32); + dst1 += sizeof(u32); + src += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst1++ = (*dst2++ ^= *src++); +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CBC mode decryption. */ +static inline void +buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, + const void *_src_cpy, size_t len) +{ + byte *dst_xor = _dst_xor; + byte *srcdst_cpy = _srcdst_cpy; + const byte *src_xor = _src_xor; + const byte *src_cpy = _src_cpy; + + while (len >= sizeof(u64)) + { + u64 temp = buf_get_he64(src_cpy); + buf_put_he64(dst_xor, buf_get_he64(srcdst_cpy) ^ buf_get_he64(src_xor)); + buf_put_he64(srcdst_cpy, temp); + dst_xor += sizeof(u64); + srcdst_cpy += sizeof(u64); + src_xor += sizeof(u64); + src_cpy += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + u32 temp = buf_get_he32(src_cpy); + buf_put_he32(dst_xor, buf_get_he32(srcdst_cpy) ^ buf_get_he32(src_xor)); + buf_put_he32(srcdst_cpy, temp); + dst_xor += sizeof(u32); + srcdst_cpy += sizeof(u32); + src_xor += sizeof(u32); + src_cpy += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + { + byte temp = *src_cpy++; + *dst_xor++ = *srcdst_cpy ^ *src_xor++; + *srcdst_cpy++ = temp; + } +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CFB mode decryption. */ +static inline void +buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len) +{ + buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len); +} + + +/* Constant-time compare of two buffers. Returns 1 if buffers are equal, + and 0 if buffers differ. */ +static inline int +buf_eq_const(const void *_a, const void *_b, size_t len) +{ + const byte *a = _a; + const byte *b = _b; + int ab, ba; + size_t i; + + /* Constant-time compare. */ + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + /* If a[i] != b[i], either ab or ba will be negative. */ + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } + + /* 'ab | ba' is negative when buffers are not equal. */ + return (ab | ba) >= 0; +} + + #endif /*GCRYPT_BUFHELP_H*/ commit 30e783ec487466132324673f197d36b85a91b060 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 rijndael: fix unused parameter warning * cipher/rijndael.c (do_setkey): Silence unused 'hd' warning. -- This commit fixes "warning: unused parameter 'hd'" warning seen on architectures that do not have alternative AES implementations. Signed-off-by: Jussi Kivilinna diff --git a/cipher/rijndael.c b/cipher/rijndael.c index d126f88..1bc8b0f 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -264,6 +264,8 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, unsigned int hwfeatures; #endif + (void)hd; + /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a commit ec49013d23d9a7b874c42d77ceb08bd313ba69e1 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 mpi/longlong.h: enable inline assembly for powerpc64 * mpi/longlong.h [__powerpc__ && W_TYPE_SIZE == 64]: Remove '#if 0'. -- PowerPC64 inline assembly was tested on QEMU ('make check' pass). Signed-off-by: Jussi Kivilinna diff --git a/mpi/longlong.h b/mpi/longlong.h index d6958f3..c0f24c8 100644 --- a/mpi/longlong.h +++ b/mpi/longlong.h @@ -1088,7 +1088,6 @@ typedef unsigned int UTItype __attribute__ ((mode (TI))); /* Powerpc 64 bit support taken from gmp-4.1.2. */ /* We should test _IBMR2 here when we add assembly support for the system vendor compilers. */ -#if 0 /* Not yet enabled because we don't have hardware for a test. */ #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ @@ -1141,7 +1140,6 @@ typedef unsigned int UTItype __attribute__ ((mode (TI))); #define SMUL_TIME 14 /* ??? */ #define UDIV_TIME 120 /* ??? */ #endif /* 64-bit PowerPC. */ -#endif /* if 0 */ /*************************************** ************** PYR ****************** commit 2aece89d3967e692743541cea857f2e4771b0b62 Author: Jussi Kivilinna Date: Mon Nov 5 20:42:58 2018 +0200 Change remaining users of _gcry_fips_mode to use fips_mode * src/fips.c (_gcry_fips_mode): Remove. (_gcry_enforced_fips_mode, _gcry_inactivate_fips_mode) (_gcry_is_fips_mode_inactive): Use fips_mode. * src/g10lib.h (_gcry_fips_mode): Remove. -- Signed-off-by: Jussi Kivilinna diff --git a/src/fips.c b/src/fips.c index 2b3a0af..36358bf 100644 --- a/src/fips.c +++ b/src/fips.c @@ -255,25 +255,11 @@ unlock_fsm (void) } -/* This function returns true if fips mode is enabled. This is - independent of the fips required finite state machine and only used - to enable fips specific code. Please use the fips_mode macro - instead of calling this function directly. */ -int -_gcry_fips_mode (void) -{ - /* No locking is required because we have the requirement that this - variable is only initialized once with no other threads - existing. */ - return !_gcry_no_fips_mode_required; -} - - /* Return a flag telling whether we are in the enforced fips mode. */ int _gcry_enforced_fips_mode (void) { - if (!_gcry_fips_mode ()) + if (!fips_mode ()) return 0; return enforced_fips_mode; } @@ -292,7 +278,7 @@ _gcry_set_enforced_fips_mode (void) void _gcry_inactivate_fips_mode (const char *text) { - gcry_assert (_gcry_fips_mode ()); + gcry_assert (fips_mode ()); if (_gcry_enforced_fips_mode () ) { @@ -323,7 +309,7 @@ _gcry_is_fips_mode_inactive (void) { int flag; - if (!_gcry_fips_mode ()) + if (!fips_mode ()) return 0; lock_fsm (); flag = inactive_fips_mode; diff --git a/src/g10lib.h b/src/g10lib.h index c1f84ee..c64cbcf 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -427,8 +427,6 @@ extern int _gcry_no_fips_mode_required; void _gcry_initialize_fips_mode (int force); -int _gcry_fips_mode (void); - /* This macro returns true if fips mode is enabled. This is independent of the fips required finite state machine and only used to enable fips specific code. ----------------------------------------------------------------------- Summary of changes: cipher/bufhelp.h | 455 ++++++++++++++++++++++-------------------------------- cipher/rijndael.c | 2 + configure.ac | 15 ++ mpi/longlong.h | 2 - src/fips.c | 20 +-- src/g10lib.h | 88 +++++------ src/misc.c | 26 ++-- src/secmem.c | 43 +++++- src/stdmem.c | 20 ++- 9 files changed, 309 insertions(+), 362 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From yann.garcia at fscom.fr Mon Nov 5 12:07:37 2018 From: yann.garcia at fscom.fr (Yann Garcia) Date: Mon, 5 Nov 2018 12:07:37 +0100 Subject: libgcrypt: Elliptic Curve Points Compact Representation Message-ID: Dear All, I'm currently developing a Wireshark dissector for protocols based on IEEE 1609.2 standard. This standard uses extensively the canonical form which is defined by using compact representation of public x,y keys. My trouble is how can I retrieve the private and uncompressed public keys when only the y key sign (LSB bit is 0 or 1) and the x public key is provided? NOTE: The Nist P-256 ECC curve is used. Please may I ask you some hints to address my trouble? Many thanks in advance for your help, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)6 68 94 57 76 Email: *yann.garcia at fscom.fr * Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From gniibe at fsij.org Tue Nov 6 00:55:22 2018 From: gniibe at fsij.org (NIIBE Yutaka) Date: Tue, 06 Nov 2018 08:55:22 +0900 Subject: libgcrypt: Elliptic Curve Points Compact Representation In-Reply-To: References: Message-ID: <87in1b13md.fsf@fsij.org> Hello, I don't know any about IEEE 1609.2, so, my explanation may be completely wrong... Yann Garcia wrote: > This standard uses extensively the canonical form which is defined by using > compact representation of public x,y keys. > > My trouble is how can I retrieve the private and uncompressed public keys > when only the y key sign (LSB bit is 0 or 1) and the x public key is > provided? > > NOTE: The Nist P-256 ECC curve is used. The appropriate Weierstrass equation can determince Y. It's: y^2 = x^3 + a*x + b Given x, you can compute x^3 + a*x + b, which should be y^2, then, in the range of (-p,p) there are two values for such y (you can get one by sqrt function). Among two, you can choice y by sign information. In the context of libgcrypt, we adopt the technique for choosing y with no sign information: https://www.ietf.org/archive/id/draft-jivsov-ecc-compact-05.txt And... for detail, this document helps, I suppose. -- From yann.garcia at fscom.fr Tue Nov 6 07:02:55 2018 From: yann.garcia at fscom.fr (Yann Garcia) Date: Tue, 6 Nov 2018 07:02:55 +0100 Subject: libgcrypt: Elliptic Curve Points Compact Representation In-Reply-To: <87in1b13md.fsf@fsij.org> References: <87in1b13md.fsf@fsij.org> Message-ID: Hello Gniibe, Many thanks for the link. Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)6 68 94 57 76 Email: *yann.garcia at fscom.fr * Skype: yann.garcia Google+: garcia.yann at gmail.com On Tue, 6 Nov 2018 at 00:55, NIIBE Yutaka wrote: > Hello, > > I don't know any about IEEE 1609.2, so, my explanation may be completely > wrong... > > Yann Garcia wrote: > > This standard uses extensively the canonical form which is defined by > using > > compact representation of public x,y keys. > > > > My trouble is how can I retrieve the private and uncompressed public keys > > when only the y key sign (LSB bit is 0 or 1) and the x public key is > > provided? > > > > NOTE: The Nist P-256 ECC curve is used. > > The appropriate Weierstrass equation can determince Y. It's: > > y^2 = x^3 + a*x + b > > Given x, you can compute x^3 + a*x + b, which should be y^2, then, in > the range of (-p,p) there are two values for such y (you can get one by > sqrt function). Among two, you can choice y by sign information. > > In the context of libgcrypt, we adopt the technique for > choosing y with no sign information: > > https://www.ietf.org/archive/id/draft-jivsov-ecc-compact-05.txt > > And... for detail, this document helps, I suppose. > -- > -------------- next part -------------- An HTML attachment was scrubbed... URL: From jussi.kivilinna at iki.fi Tue Nov 6 20:31:49 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Tue, 6 Nov 2018 21:31:49 +0200 Subject: [PATCH] Fix inlining of ocb_get_l for x86 AES implementations Message-ID: <154153270977.25712.7349111870296367798.stgit@localhost.localdomain> * cipher/rijndael-aesni.c (aes_ocb_get_l): New. (aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Use 'aes_ocb_get_l'. * cipher/rijndael-ssse3-amd4.c (aes_ocb_get_l): New. (ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_auth): Use 'aes_ocb_get_l'. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index e7e61ca8a..d190c0ac4 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -47,6 +47,23 @@ typedef struct u128_s } __attribute__((packed, aligned(1), may_alias)) u128_t; +/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l + because of 'pragma target'. */ +static inline const unsigned char * +aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); + + return c->u_mode.ocb.L[ntz]; +} + + /* Two macros to be called prior and after the use of AESNI instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the @@ -2199,7 +2216,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2241,7 +2258,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2285,7 +2302,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2364,7 +2381,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2433,7 +2450,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2503,7 +2520,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2545,7 +2562,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2585,7 +2602,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2668,7 +2685,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2737,7 +2754,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2813,7 +2830,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2853,7 +2870,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2885,7 +2902,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[abuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2938,7 +2955,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2989,7 +3006,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index 07a64a4c1..fa481bb4f 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -57,6 +57,23 @@ #endif +/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l + because of 'pragma target'. */ +static inline const unsigned char * +aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); + + return c->u_mode.ocb.L[ntz]; +} + + /* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these have custom calling convention (additional XMM parameters). */ extern void _gcry_aes_ssse3_enc_preload(void); @@ -528,7 +545,7 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -597,7 +614,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -673,7 +690,7 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ From yann.garcia at fscom.fr Wed Nov 7 11:25:01 2018 From: yann.garcia at fscom.fr (Yann Garcia) Date: Wed, 7 Nov 2018 11:25:01 +0100 Subject: libgcrypt: Elliptic Curve Points Compact Representation In-Reply-To: References: <87in1b13md.fsf@fsij.org> Message-ID: Hello Niibe, I take a look to the link and in fact I want to do the opposite: I have an ephemeral encryption key such as: compressed_y_1 := 'CF2A7D7467F217A6B7AEF4C34452A4C62FEDA99C1E1EDEB740F662841B84D394'O compressed_y_1 means that the LSB bit of Y in 1. Knowing that I'm working with NIST P-256 elliptic curve, I want to calculate first Y^2. To do it: I wrote this code: /* y^2=x^3+a*x+b */ three = gcry_mpi_set_ui (NULL, 3); x_3 = gcry_mpi_new (0); axb = gcry_mpi_new (0); y_2 = gcry_mpi_new (0); gcry_mpi_powm (x_3, x, three, p); // w = b^e \bmod m. gcry_mpi_mulm (axb, a, x, p); gcry_mpi_addm (axb, axb, b, p); gcry_mpi_addm (y_2, x_3, axb, p); show_mpi("y_2", "", y_2); Where a, b, p are parameters from the Nist P-256 elliptic curve. My concerns now is how to get the two possible values of y? Many thanks in advance fir your help, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)6 68 94 57 76 Email: *yann.garcia at fscom.fr * Skype: yann.garcia Google+: garcia.yann at gmail.com On Tue, 6 Nov 2018 at 07:02, Yann Garcia wrote: > Hello Gniibe, > > Many thanks for the link. > > Best regards, > > Yann Garcia > Senior Software Engineer > Microsoft MCAD.net Certified > ************************************** > FSCOM SARL > Le Montespan B2 > 6, > > Avenue des Alpes > > > F-06600 Antibes, FRANCE > ************************************************ > Tel: +33 (0)4 92 94 49 08 > Mobile: +33 (0)6 68 94 57 76 > Email: *yann.garcia at fscom.fr * > Skype: yann.garcia > Google+: garcia.yann at gmail.com > > > On Tue, 6 Nov 2018 at 00:55, NIIBE Yutaka wrote: > >> Hello, >> >> I don't know any about IEEE 1609.2, so, my explanation may be completely >> wrong... >> >> Yann Garcia wrote: >> > This standard uses extensively the canonical form which is defined by >> using >> > compact representation of public x,y keys. >> > >> > My trouble is how can I retrieve the private and uncompressed public >> keys >> > when only the y key sign (LSB bit is 0 or 1) and the x public key is >> > provided? >> > >> > NOTE: The Nist P-256 ECC curve is used. >> >> The appropriate Weierstrass equation can determince Y. It's: >> >> y^2 = x^3 + a*x + b >> >> Given x, you can compute x^3 + a*x + b, which should be y^2, then, in >> the range of (-p,p) there are two values for such y (you can get one by >> sqrt function). Among two, you can choice y by sign information. >> >> In the context of libgcrypt, we adopt the technique for >> choosing y with no sign information: >> >> https://www.ietf.org/archive/id/draft-jivsov-ecc-compact-05.txt >> >> And... for detail, this document helps, I suppose. >> -- >> > -------------- next part -------------- An HTML attachment was scrubbed... URL: From jussi.kivilinna at iki.fi Wed Nov 7 18:16:43 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 7 Nov 2018 19:16:43 +0200 Subject: [PATCH] Fix undefined variable in CTR mode Message-ID: <154161100373.29391.9192094244370805798.stgit@localhost.localdomain> * cipher/cipher-ctr.c (_gcry_cipher_ctr_encrypt): Set N to BLOCKSIZE before counter loop. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/cipher-ctr.c b/cipher/cipher-ctr.c index 2df0b8c3e..546d4f8e6 100644 --- a/cipher/cipher-ctr.c +++ b/cipher/cipher-ctr.c @@ -77,6 +77,7 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, { unsigned char tmp[MAX_BLOCKSIZE]; + n = blocksize; do { nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); @@ -91,7 +92,6 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, if (inbuflen < blocksize) break; - n = blocksize; cipher_block_xor(outbuf, inbuf, tmp, blocksize); inbuflen -= n; From cvs at cvs.gnupg.org Wed Nov 7 19:56:00 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Wed, 07 Nov 2018 19:56:00 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-125-g3f76319 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 3f76319803a4abcd33fa29a0ac39f8ed9d646226 (commit) via 9d6431604b5ee21572c1c2cfa8376e6d81162cbb (commit) from 23f56d3359ca7d152aa87874ddd6305171a91408 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 3f76319803a4abcd33fa29a0ac39f8ed9d646226 Author: Jussi Kivilinna Date: Wed Nov 7 19:12:29 2018 +0200 Fix 'variable may be used uninitialized' warning for CTR mode * cipher/cipher-ctr.c (_gcry_cipher_ctr_encrypt): Set N to BLOCKSIZE before counter loop. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/cipher-ctr.c b/cipher/cipher-ctr.c index 2df0b8c..546d4f8 100644 --- a/cipher/cipher-ctr.c +++ b/cipher/cipher-ctr.c @@ -77,6 +77,7 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, { unsigned char tmp[MAX_BLOCKSIZE]; + n = blocksize; do { nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); @@ -91,7 +92,6 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, if (inbuflen < blocksize) break; - n = blocksize; cipher_block_xor(outbuf, inbuf, tmp, blocksize); inbuflen -= n; commit 9d6431604b5ee21572c1c2cfa8376e6d81162cbb Author: Jussi Kivilinna Date: Tue Nov 6 20:27:34 2018 +0200 Fix inlining of ocb_get_l for x86 AES implementations * cipher/rijndael-aesni.c (aes_ocb_get_l): New. (aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Use 'aes_ocb_get_l'. * cipher/rijndael-ssse3-amd4.c (aes_ocb_get_l): New. (ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_auth): Use 'aes_ocb_get_l'. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index e7e61ca..d190c0a 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -47,6 +47,23 @@ typedef struct u128_s } __attribute__((packed, aligned(1), may_alias)) u128_t; +/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l + because of 'pragma target'. */ +static inline const unsigned char * +aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); + + return c->u_mode.ocb.L[ntz]; +} + + /* Two macros to be called prior and after the use of AESNI instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the @@ -2199,7 +2216,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2241,7 +2258,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2285,7 +2302,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2364,7 +2381,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2433,7 +2450,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -2503,7 +2520,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2545,7 +2562,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2585,7 +2602,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2668,7 +2685,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2737,7 +2754,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -2813,7 +2830,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks && n % 4; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2853,7 +2870,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2885,7 +2902,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, : "memory" ); n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[abuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" @@ -2938,7 +2955,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; - l = ocb_get_l(c, n); + l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ @@ -2989,7 +3006,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, for ( ;nblocks; nblocks-- ) { - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index 07a64a4..fa481bb 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -57,6 +57,23 @@ #endif +/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l + because of 'pragma target'. */ +static inline const unsigned char * +aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); + + return c->u_mode.ocb.L[ntz]; +} + + /* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these have custom calling convention (additional XMM parameters). */ extern void _gcry_aes_ssse3_enc_preload(void); @@ -528,7 +545,7 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -597,7 +614,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ @@ -673,7 +690,7 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { const unsigned char *l; - l = ocb_get_l(c, ++n); + l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ ----------------------------------------------------------------------- Summary of changes: cipher/cipher-ctr.c | 2 +- cipher/rijndael-aesni.c | 47 +++++++++++++++++++++++++++++-------------- cipher/rijndael-ssse3-amd64.c | 23 ++++++++++++++++++--- 3 files changed, 53 insertions(+), 19 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From garcia.yann at gmail.com Fri Nov 9 09:02:34 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Fri, 9 Nov 2018 09:02:34 +0100 Subject: Nist P256: How to calculate Y public key when knowing X and the LSB bit of Y public key Message-ID: Dear All, I was requested to write a dissector for Wireshark where I need to decrypt data. I have the ephemeral X public key provided in compressed format. This means: compressed_y_1 := 'CF2A7D7467F217A6B7AEF4C34452A4C62FEDA99C1E1EDEB740F662841B84D394'O compressed_y_1 means that the LSB bit of Y in 1. I know I have t use Nist P256 (or Brainpool P256 r1). My problem is to calculate Y based on previous information. First I calculated Y^2: /* y^2=x^3+a*x+b */ three = gcry_mpi_set_ui (NULL, 3); x_3 = gcry_mpi_new (0); axb = gcry_mpi_new (0); y_2 = gcry_mpi_new (0); gcry_mpi_powm (x_3, x, three, p); // w = b^e \bmod m. gcry_mpi_mulm (axb, a, x, p); gcry_mpi_addm (axb, axb, b, p); gcry_mpi_addm (y_2, x_3, axb, p); show_mpi("y_2", "", y_2); Where a, b, p are parameters from the Nist P-256 elliptic curve. My concerns now is how to get the two possible values of y? In other words, how can I calculate sqrt(y^2). I did some research on the Internet, but what I found is too much Mathematical oriented and I was lost. I need something close than an algorithm I can implement with libgcrypt MIP library. Please can anyone provide me to some hints? Many thanks in advance for your help, Best regards, Yann Garcia -------------- next part -------------- An HTML attachment was scrubbed... URL: From gniibe at fsij.org Fri Nov 9 13:50:19 2018 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 09 Nov 2018 21:50:19 +0900 Subject: Nist P256: How to calculate Y public key when knowing X and the LSB bit of Y public key In-Reply-To: References: Message-ID: <871s7ujtys.fsf@fsij.org> Yann Garcia wrote: > My problem is to calculate Y based on previous information. This page helps: https://en.wikipedia.org/wiki/Quadratic_residue > My concerns now is how to get the two possible values of y? In other words, > how can I calculate sqrt(y^2). If it exists, according to the page above, when p_plus_1_div_4 has (p+1)/4, you can calculate it by gcry_mpi_powm (y, y_2, p_plus_1_div_4, p) You can see if it exists or not by Euler's criterion: gcry_mpi_powm (eulers_criterion, y_2, p_minus_1_div_2, p) Here, p_minus_1_div_2 should have (p-1)/2. When eulers_criterion is 1, it does exist. When eulers_criterion is p-1 (-1 mod p), it doesn't exist. -- From jussi.kivilinna at iki.fi Sun Nov 11 14:41:24 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 11 Nov 2018 15:41:24 +0200 Subject: [PATCH] Add clang target pragma for mixed C/assembly x86-64 implementations Message-ID: <154194368472.30658.18132085330003367734.stgit@localhost.localdomain> * cipher/cipher-gcm-intel-pclmul.c: Add target 'no-sse' attribute pragma for clang. * cipher/crc-intel-pclmul.c: Ditto. * cipher/rijndael-aesni.c: Ditto. * cipher/rijndael-ssse3-amd64.c: Ditto. * cipher/sha1-intel-shaext.c: Ditto. * cipher/sha256-intel-shaext.c: Ditto. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c index 0f26277a5..60ae7aa9a 100644 --- a/cipher/cipher-gcm-intel-pclmul.c +++ b/cipher/cipher-gcm-intel-pclmul.c @@ -37,6 +37,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* @@ -474,4 +477,8 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/cipher/crc-intel-pclmul.c b/cipher/crc-intel-pclmul.c index 8ff08ec10..482b260bf 100644 --- a/cipher/crc-intel-pclmul.c +++ b/cipher/crc-intel-pclmul.c @@ -39,6 +39,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif #define ALIGNED_16 __attribute__ ((aligned (16))) @@ -922,4 +925,8 @@ _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) #endif } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_INTEL_PCLMUL */ diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 081bf124c..c29be50e5 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -39,6 +39,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif #define ALWAYS_INLINE inline __attribute__((always_inline)) @@ -3494,4 +3497,8 @@ _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks); } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_AESNI */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index fa481bb4f..0c1ae6e6e 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -55,6 +55,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l @@ -726,4 +729,8 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_SSSE3 */ diff --git a/cipher/sha1-intel-shaext.c b/cipher/sha1-intel-shaext.c index 5a2349e1e..d7e3d4f8e 100644 --- a/cipher/sha1-intel-shaext.c +++ b/cipher/sha1-intel-shaext.c @@ -29,6 +29,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between @@ -278,4 +281,8 @@ _gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ diff --git a/cipher/sha256-intel-shaext.c b/cipher/sha256-intel-shaext.c index 0c107bb4c..2eda42d8d 100644 --- a/cipher/sha256-intel-shaext.c +++ b/cipher/sha256-intel-shaext.c @@ -29,6 +29,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between @@ -349,4 +352,8 @@ _gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *data, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ From garcia.yann at gmail.com Mon Nov 12 13:24:01 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Mon, 12 Nov 2018 13:24:01 +0100 Subject: Libgcrypt: How to implement ECIES encryption (ECC + AES) Message-ID: Dear All, I want to implement ECIES encryption. To do it, I have to 1) generate ECDH secret key (the key agreement) and then 2) derive it using Key Derivation Function 2 (KDF2). 1) For the key agreement, I need to generate a random number. Can you indicate me the best way to generate this random number because in the help, clause 9.2 Retrieving random numbers, I found 3 different methods and I don't know which one is the best for my purpose? 2) Is it correct to use the function gcry_kdf_derive() with GCRY_KDF_PBKDF2 flag to do the Key Derivation Function 2? 3) Does the libgcrypt provides samples for ECIES encryption? Many thanks in advance for your help, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From garcia.yann at gmail.com Mon Nov 12 14:00:26 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Mon, 12 Nov 2018 14:00:26 +0100 Subject: Fwd: Libgcrypt: How to implement ECIES encryption (ECC + AES) In-Reply-To: References: Message-ID: Hello Stephen, Thanks a lot for your response. Does it mean that RFC 2898. is not supported? Thanks, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com ---------- Forwarded message --------- From: Yann Garcia Date: Mon, 12 Nov 2018 at 13:24 Subject: Libgcrypt: How to implement ECIES encryption (ECC + AES) To: Dear All, I want to implement ECIES encryption. To do it, I have to 1) generate ECDH secret key (the key agreement) and then 2) derive it using Key Derivation Function 2 (KDF2). 1) For the key agreement, I need to generate a random number. Can you indicate me the best way to generate this random number because in the help, clause 9.2 Retrieving random numbers, I found 3 different methods and I don't know which one is the best for my purpose? 2) Is it correct to use the function gcry_kdf_derive() with GCRY_KDF_PBKDF2 flag to do the Key Derivation Function 2? 3) Does the libgcrypt provides samples for ECIES encryption? Many thanks in advance for your help, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From jussi.kivilinna at iki.fi Sun Nov 11 14:41:09 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 11 Nov 2018 15:41:09 +0200 Subject: [PATCH] Optimizations for AES-NI OCB Message-ID: <154194366965.30500.17502266982740083773.stgit@localhost.localdomain> * cipher/cipher-ocb.c (ocb_crypt): Process input in 24KiB chunks for better cache locality for checksumming. * cipher/rijndael-aesni.c (ALWAYS_INLINE): New macro for always inlining functions, change all functions with 'inline' to use ALWAYS_INLINE. (NO_INLINE): New macro. (aesni_prepare_2_6_variable, aesni_prepare_7_15_variable): Rename to... (aesni_prepare_2_7_variable, aesni_prepare_8_15_variable): ...these and adjust accordingly (xmm7 moved from *_7_15 to *_2_7). (aesni_prepare_2_6, aesni_prepare_7_15): Rename to... (aesni_prepare_2_7, aesni_prepare_8_15): ...these and adjust accordingly. (aesni_cleanup_2_6, aesni_cleanup_7_15): Rename to... (aesni_cleanup_2_7, aesni_cleanup_8_15): ...these and adjust accordingly. (aesni_ocb_checksum): New. (aesni_ocb_enc, aesni_ocb_dec): Calculate OCB offsets in parallel with help of precalculated offsets L0+L1 ja L0+L1+L0; Do checksum calculation as separate pass instead of inline; Use NO_INLINE. * cipher/rijndael-internal.h (RIJNDAEL_context_s) [USE_AESNI]: Add 'use_avx2'. * cipher/rijndael.c (do_setkey) [USE_AESNI]: Set 'use_avx2' if Intel AVX2 HW feature is available. * tests/basic.c (do_check_ocb_cipher): New test vector; increase size of temporary buffers for new test vector. (check_ocb_cipher_largebuf_split): Make test plaintext non-uniform for better checksum testing. (check_ocb_cipher_checksum): New. (check_ocb_cipher_largebuf): Call check_ocb_cipher_checksum. (check_ocb_cipher): New expected tags for check_ocb_cipher_largebuf test runs. -- Benchmark on Haswell i7-4970k @ 4.0Ghz: Before: AES | nanosecs/byte mebibytes/sec cycles/byte OCB enc | 0.175 ns/B 5436 MiB/s 0.702 c/B OCB dec | 0.184 ns/B 5184 MiB/s 0.736 c/B After (enc +2% faster, dec +7% faster): OCB enc | 0.172 ns/B 5557 MiB/s 0.686 c/B OCB dec | 0.171 ns/B 5572 MiB/s 0.685 c/B Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index f71520ad2..cb6afd2b5 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -519,6 +519,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, nblks = nblks < nmaxblks ? nblks : nmaxblks; + /* Since checksum xoring is done before/after encryption/decryption, + process input in 24KiB chunks to keep data loaded in L1 cache for + checksumming. */ + if (nblks > 24 * 1024 / OCB_BLOCK_LEN) + nblks = 24 * 1024 / OCB_BLOCK_LEN; + /* Use a bulk method if available. */ if (nblks && c->bulk.ocb_crypt) { diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index d190c0ac4..081bf124c 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -41,6 +41,10 @@ #endif +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) + + typedef struct u128_s { u32 a, b, c, d; @@ -49,7 +53,7 @@ typedef struct u128_s /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l because of 'pragma target'. */ -static inline const unsigned char * +static ALWAYS_INLINE const unsigned char * aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; @@ -71,78 +75,78 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ -# define aesni_prepare_2_6_variable char win64tmp[16] -# define aesni_prepare_7_15_variable char win64tmp7_15[16 * 9] +# define aesni_prepare_2_7_variable char win64tmp[16 * 2] +# define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8] # define aesni_prepare() do { } while (0) -# define aesni_prepare_2_6() \ +# define aesni_prepare_2_7() \ do { asm volatile ("movdqu %%xmm6, %0\n\t" \ - : "=m" (*win64tmp) \ + "movdqu %%xmm7, %1\n\t" \ + : "=m" (*win64tmp), "=m" (*(win64tmp+16)) \ : \ : "memory"); \ } while (0) -# define aesni_prepare_7_15() \ - do { asm volatile ("movdqu %%xmm7, 0*16(%0)\n\t" \ - "movdqu %%xmm8, 1*16(%0)\n\t" \ - "movdqu %%xmm9, 2*16(%0)\n\t" \ - "movdqu %%xmm10, 3*16(%0)\n\t" \ - "movdqu %%xmm11, 4*16(%0)\n\t" \ - "movdqu %%xmm12, 5*16(%0)\n\t" \ - "movdqu %%xmm13, 6*16(%0)\n\t" \ - "movdqu %%xmm14, 7*16(%0)\n\t" \ - "movdqu %%xmm15, 8*16(%0)\n\t" \ +# define aesni_prepare_8_15() \ + do { asm volatile ("movdqu %%xmm8, 0*16(%0)\n\t" \ + "movdqu %%xmm9, 1*16(%0)\n\t" \ + "movdqu %%xmm10, 2*16(%0)\n\t" \ + "movdqu %%xmm11, 3*16(%0)\n\t" \ + "movdqu %%xmm12, 4*16(%0)\n\t" \ + "movdqu %%xmm13, 5*16(%0)\n\t" \ + "movdqu %%xmm14, 6*16(%0)\n\t" \ + "movdqu %%xmm15, 7*16(%0)\n\t" \ : \ - : "r" (win64tmp7_15) \ + : "r" (win64tmp8_15) \ : "memory"); \ } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) -# define aesni_cleanup_2_6() \ +# define aesni_cleanup_2_7() \ do { asm volatile ("movdqu %0, %%xmm6\n\t" \ + "movdqu %1, %%xmm7\n\t" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ : \ - : "m" (*win64tmp) \ + : "m" (*win64tmp), "m" (*(win64tmp+16)) \ : "memory"); \ } while (0) -# define aesni_cleanup_7_15() \ - do { asm volatile ("movdqu 0*16(%0), %%xmm7\n\t" \ - "movdqu 1*16(%0), %%xmm8\n\t" \ - "movdqu 2*16(%0), %%xmm9\n\t" \ - "movdqu 3*16(%0), %%xmm10\n\t" \ - "movdqu 4*16(%0), %%xmm11\n\t" \ - "movdqu 5*16(%0), %%xmm12\n\t" \ - "movdqu 6*16(%0), %%xmm13\n\t" \ - "movdqu 7*16(%0), %%xmm14\n\t" \ - "movdqu 8*16(%0), %%xmm15\n\t" \ +# define aesni_cleanup_8_15() \ + do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t" \ + "movdqu 1*16(%0), %%xmm9\n\t" \ + "movdqu 2*16(%0), %%xmm10\n\t" \ + "movdqu 3*16(%0), %%xmm11\n\t" \ + "movdqu 4*16(%0), %%xmm12\n\t" \ + "movdqu 5*16(%0), %%xmm13\n\t" \ + "movdqu 6*16(%0), %%xmm14\n\t" \ + "movdqu 7*16(%0), %%xmm15\n\t" \ : \ - : "r" (win64tmp7_15) \ + : "r" (win64tmp8_15) \ : "memory"); \ } while (0) #else -# define aesni_prepare_2_6_variable +# define aesni_prepare_2_7_variable # define aesni_prepare() do { } while (0) -# define aesni_prepare_2_6() do { } while (0) +# define aesni_prepare_2_7() do { } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) -# define aesni_cleanup_2_6() \ - do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ +# define aesni_cleanup_2_7() \ + do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ + "pxor %%xmm2, %%xmm2\n\t" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n":: ); \ } while (0) # ifdef __x86_64__ -# define aesni_prepare_7_15_variable -# define aesni_prepare_7_15() do { } while (0) -# define aesni_cleanup_7_15() \ - do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ - "pxor %%xmm8, %%xmm8\n" \ +# define aesni_prepare_8_15_variable +# define aesni_prepare_8_15() do { } while (0) +# define aesni_cleanup_8_15() \ + do { asm volatile ("pxor %%xmm8, %%xmm8\n" \ "pxor %%xmm9, %%xmm9\n" \ "pxor %%xmm10, %%xmm10\n" \ "pxor %%xmm11, %%xmm11\n" \ @@ -157,10 +161,10 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) void _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare(); - aesni_prepare_2_6(); + aesni_prepare_2_7(); if (ctx->rounds < 12) { @@ -383,12 +387,12 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) } aesni_cleanup(); - aesni_cleanup_2_6(); + aesni_cleanup_2_7(); } /* Make a decryption key from an encryption key. */ -static inline void +static ALWAYS_INLINE void do_aesni_prepare_decryption (RIJNDAEL_context *ctx) { /* The AES-NI decrypt instructions use the Equivalent Inverse @@ -447,7 +451,7 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) /* Encrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc (const RIJNDAEL_context *ctx) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" @@ -500,7 +504,7 @@ do_aesni_enc (const RIJNDAEL_context *ctx) /* Decrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec (const RIJNDAEL_context *ctx) { #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" @@ -553,7 +557,7 @@ do_aesni_dec (const RIJNDAEL_context *ctx) /* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) { #define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t" @@ -662,7 +666,7 @@ do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) /* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) { #define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t" @@ -773,7 +777,7 @@ do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) /* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" @@ -925,7 +929,7 @@ do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) /* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" @@ -1757,10 +1761,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ @@ -1794,7 +1798,7 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -1805,10 +1809,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ @@ -1820,9 +1824,9 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { @@ -1831,7 +1835,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -1848,7 +1852,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, inbuf += BLOCKSIZE; } aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -1876,10 +1880,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm6\n\t" : /* No output */ @@ -1891,9 +1895,9 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8; nblocks -= 8) { @@ -1953,7 +1957,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2022,7 +2026,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -2031,10 +2035,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); if ( !ctx->decryption_prepared ) { @@ -2051,9 +2055,9 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { @@ -2113,7 +2117,7 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2187,11 +2191,119 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, : "memory"); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } -static void +static ALWAYS_INLINE void +aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + + /* Calculate checksum */ + asm volatile ("movdqu %[checksum], %%xmm6\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + : + :[checksum] "m" (*c->u_ctr.ctr) + : "memory" ); + +#if defined(HAVE_GCC_INLINE_ASM_AVX2) + if (nblocks >= 16 && ctx->use_avx2) + { + asm volatile ("vzeroupper\n\t" + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" + "vpxor %%xmm5, %%xmm5, %%xmm5\n\t" + "vpxor %%xmm7, %%xmm7, %%xmm7\n\t" + : + : + : "memory"); + + for (;nblocks >= 16; nblocks -= 16) + { + asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t" + "vpxor %[ptr1], %%ymm1, %%ymm1\n\t" + "vpxor %[ptr2], %%ymm2, %%ymm2\n\t" + "vpxor %[ptr3], %%ymm3, %%ymm3\n\t" + "vpxor %[ptr4], %%ymm0, %%ymm0\n\t" + "vpxor %[ptr5], %%ymm4, %%ymm4\n\t" + "vpxor %[ptr6], %%ymm5, %%ymm5\n\t" + "vpxor %[ptr7], %%ymm7, %%ymm7\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), + [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), + [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), + [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), + [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), + [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), + [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) + : "memory" ); + plaintext += BLOCKSIZE * 16; + } + + asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t" + "vpxor %%ymm4, %%ymm1, %%ymm1\n\t" + "vpxor %%ymm5, %%ymm2, %%ymm2\n\t" + "vpxor %%ymm7, %%ymm3, %%ymm3\n\t" + "vextracti128 $1, %%ymm6, %%xmm0\n\t" + "vextracti128 $1, %%ymm1, %%xmm4\n\t" + "vextracti128 $1, %%ymm2, %%xmm5\n\t" + "vextracti128 $1, %%ymm3, %%xmm7\n\t" + "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" + "vpxor %%xmm4, %%xmm1, %%xmm1\n\t" + "vpxor %%xmm5, %%xmm2, %%xmm2\n\t" + "vpxor %%xmm7, %%xmm3, %%xmm3\n\t" + "vzeroupper\n\t" + : + : + : "memory" ); + } +#endif + + for (;nblocks >= 4; nblocks -= 4) + { + asm volatile ("movdqu %[ptr0], %%xmm0\n\t" + "movdqu %[ptr1], %%xmm4\n\t" + "movdqu %[ptr2], %%xmm5\n\t" + "movdqu %[ptr3], %%xmm7\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm4, %%xmm1\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm7, %%xmm3\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)), + [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)), + [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE)) + : "memory" ); + plaintext += BLOCKSIZE * 4; + } + + for (;nblocks >= 1; nblocks -= 1) + { + asm volatile ("movdqu %[ptr0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)) + : "memory" ); + plaintext += BLOCKSIZE; + } + + asm volatile ("pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "movdqu %%xmm6, %[checksum]\n\t" + : [checksum] "=m" (*c->u_ctr.ctr) + : + : "memory" ); +} + + +static unsigned int NO_INLINE aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { @@ -2200,31 +2312,35 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + byte tempbuf[16 * 2 + 15]; + byte *l0l1; + byte *l0l1l0; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); + + aesni_ocb_checksum (c, inbuf_arg, nblocks); - /* Preload Offset and Checksum */ + asm volatile ("" : "=r" (l0l1) : "0" (tempbuf) : "memory"); + l0l1 = l0l1 + (-(uintptr_t)l0l1 & 15); + l0l1l0 = l0l1 + 16; + + /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" - "movdqu %[ctr], %%xmm6\n\t" : /* No output */ - : [iv] "m" (*c->u_iv.iv), - [ctr] "m" (*c->u_ctr.ctr) + : [iv] "m" (*c->u_iv.iv) : "memory" ); - for ( ;nblocks && n % 4; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" - "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), @@ -2243,95 +2359,103 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, outbuf += BLOCKSIZE; } + asm volatile ("movdqu %[l0], %%xmm6\n\t" + "movdqu %[l1], %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1]\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1l0]\n\t" + : [l0l1] "=m" (*l0l1), + [l0l1l0] "=m" (*l0l1l0) + : [l0] "m" (*c->u_mode.ocb.L[0]), + [l1] "m" (*c->u_mode.ocb.L[1]) + : "memory" ); + #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; - - aesni_prepare_7_15(); + aesni_prepare_8_15_variable; - asm volatile ("movdqu %[l0], %%xmm7\n\t" - : - : [l0] "m" (*c->u_mode.ocb.L[0]) - : "memory" ); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = aes_ocb_get_l(c, n); - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - - asm volatile ("movdqu %[l1], %%xmm10\n\t" - "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqa %%xmm5, %%xmm12\n\t" + asm volatile ("movdqa %[l0l1], %%xmm10\n\t" + "movdqa %[l0l1l0], %%xmm11\n\t" + "movdqu %[l3], %%xmm15\n\t" : - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : [l0l1] "m" (*l0l1), + [l0l1l0] "m" (*l0l1l0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqa %%xmm5, %%xmm13\n\t" + + n += 4; + l = aes_ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */ + + asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" : - : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm5, %%xmm3\n\t" - "movdqa %%xmm5, %%xmm14\n\t" + asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" + "movdqu %[inbuf4], %%xmm8\n\t" + "movdqu %[inbuf5], %%xmm9\n\t" : - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), + [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), + [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm15\n\t" - "movdqu %[inbuf3], %%xmm4\n\t" + asm volatile ("movdqa %%xmm6, %%xmm12\n\t" + "pxor %%xmm5, %%xmm12\n\t" + "pxor %%xmm12, %%xmm1\n\t" + + "movdqa %%xmm10, %%xmm13\n\t" + "pxor %%xmm5, %%xmm13\n\t" + "pxor %%xmm13, %%xmm2\n\t" + + "movdqa %%xmm11, %%xmm14\n\t" + "pxor %%xmm5, %%xmm14\n\t" + "pxor %%xmm14, %%xmm3\n\t" + + "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm15, %%xmm5\n\t" - "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" - : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) - : "memory" ); - n += 4; - l = aes_ocb_get_l(c, n); - - asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm8, %%xmm6\n\t" - "pxor %%xmm5, %%xmm8\n\t" - "movdqu %%xmm5, %[outbuf4]\n\t" - : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) - : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm9, %%xmm6\n\t" - "pxor %%xmm5, %%xmm9\n\t" - "movdqu %%xmm5, %[outbuf5]\n\t" - : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) - : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) + "movdqa %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm8\n\t" + "movdqu %%xmm0, %[outbuf4]\n\t" + + "movdqa %%xmm10, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm9\n\t" + "movdqu %%xmm0, %[outbuf5]\n\t" + : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), + [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) + : : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm10, %%xmm6\n\t" - "pxor %%xmm5, %%xmm10\n\t" - "movdqu %%xmm5, %[outbuf6]\n\t" + "movdqa %%xmm11, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm10\n\t" + "movdqu %%xmm0, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l7], %%xmm11\n\t" + asm volatile ("movdqu %[l7], %%xmm0\n\t" "pxor %%xmm11, %%xmm5\n\t" + "pxor %%xmm0, %%xmm5\n\t" "movdqu %[inbuf7], %%xmm11\n\t" - "pxor %%xmm11, %%xmm6\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), @@ -2374,7 +2498,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2384,44 +2508,47 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - asm volatile ("movdqu %[l0], %%xmm4\n\t" + + asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqu %%xmm5, %[outbuf0]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + "movdqa %[l0l1], %%xmm3\n\t" + : : [l0] "m" (*c->u_mode.ocb.L[0]), + [l0l1] "m" (*l0l1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l1], %%xmm0\n\t" - "movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm0, %%xmm5\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqu %%xmm5, %[outbuf1]\n\t" - : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + asm volatile ("movdqa %[l0l1l0], %%xmm4\n\t" + "movdqu %[l3], %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm0, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0l1l0] "m" (*l0l1l0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm3, %%xmm6\n\t" + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" - "movdqu %%xmm5, %[outbuf2]\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "movdqu %%xmm3, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm0, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm4\n\t" + asm volatile ("pxor %%xmm6, %%xmm5\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); @@ -2453,12 +2580,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" - "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), @@ -2479,30 +2604,41 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" - "movdqu %%xmm6, %[ctr]\n\t" - : [iv] "=m" (*c->u_iv.iv), - [ctr] "=m" (*c->u_ctr.ctr) + : [iv] "=m" (*c->u_iv.iv) : : "memory" ); + asm volatile ("pxor %%xmm0, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1]\n\t" + "movdqa %%xmm0, %[l0l1l0]\n\t" + : [l0l1] "=m" (*l0l1), + [l0l1l0] "=m" (*l0l1l0) + : + : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); + + return 0; } -static void +static unsigned int NO_INLINE aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks) + const void *inbuf_arg, size_t nblocks_arg) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + size_t nblocks = nblocks_arg; + byte tempbuf[16 * 2 + 15]; + byte *l0l1; + byte *l0l1l0; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { @@ -2510,12 +2646,14 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, ctx->decryption_prepared = 1; } - /* Preload Offset and Checksum */ + asm volatile ("" : "=r" (l0l1) : "0" (tempbuf) : "memory"); + l0l1 = l0l1 + (-(uintptr_t)l0l1 & 15); + l0l1l0 = l0l1 + 16; + + /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" - "movdqu %[ctr], %%xmm6\n\t" : /* No output */ - : [iv] "m" (*c->u_iv.iv), - [ctr] "m" (*c->u_ctr.ctr) + : [iv] "m" (*c->u_iv.iv) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) @@ -2524,7 +2662,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" @@ -2537,7 +2674,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" - "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : @@ -2547,87 +2683,103 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, outbuf += BLOCKSIZE; } + asm volatile ("movdqu %[l0], %%xmm6\n\t" + "movdqu %[l1], %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1]\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1l0]\n\t" + : [l0l1] "=m" (*l0l1), + [l0l1l0] "=m" (*l0l1l0) + : [l0] "m" (*c->u_mode.ocb.L[0]), + [l1] "m" (*c->u_mode.ocb.L[1]) + : "memory" ); + #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; - - aesni_prepare_7_15(); + aesni_prepare_8_15_variable; - asm volatile ("movdqu %[l0], %%xmm7\n\t" - : - : [l0] "m" (*c->u_mode.ocb.L[0]) - : "memory" ); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = aes_ocb_get_l(c, n); + asm volatile ("movdqa %[l0l1], %%xmm10\n\t" + "movdqa %[l0l1l0], %%xmm11\n\t" + "movdqu %[l3], %%xmm15\n\t" + : + : [l0l1] "m" (*l0l1), + [l0l1l0] "m" (*l0l1l0), + [l3] "m" (*l) + : "memory" ); + + n += 4; + l = aes_ocb_get_l(c, n); + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - asm volatile ("movdqu %[l1], %%xmm10\n\t" - "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqa %%xmm5, %%xmm12\n\t" + asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" : - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqa %%xmm5, %%xmm13\n\t" - : - : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm3\n\t" - "movdqa %%xmm5, %%xmm14\n\t" + asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" + "movdqu %[inbuf4], %%xmm8\n\t" + "movdqu %[inbuf5], %%xmm9\n\t" : - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), + [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), + [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" - "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm0, %%xmm5\n\t" + asm volatile ("movdqa %%xmm6, %%xmm12\n\t" + "pxor %%xmm5, %%xmm12\n\t" + "pxor %%xmm12, %%xmm1\n\t" + + "movdqa %%xmm10, %%xmm13\n\t" + "pxor %%xmm5, %%xmm13\n\t" + "pxor %%xmm13, %%xmm2\n\t" + + "movdqa %%xmm11, %%xmm14\n\t" + "pxor %%xmm5, %%xmm14\n\t" + "pxor %%xmm14, %%xmm3\n\t" + + "pxor %%xmm11, %%xmm5\n\t" + "pxor %%xmm15, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" - : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) - : "memory" ); - n += 4; - l = aes_ocb_get_l(c, n); - - asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm8\n\t" - "movdqu %%xmm5, %[outbuf4]\n\t" - : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) - : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm5, %%xmm9\n\t" - "movdqu %%xmm5, %[outbuf5]\n\t" - : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) - : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) + "movdqa %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm8\n\t" + "movdqu %%xmm0, %[outbuf4]\n\t" + + "movdqa %%xmm10, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm9\n\t" + "movdqu %%xmm0, %[outbuf5]\n\t" + : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), + [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) + : : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm10\n\t" - "movdqu %%xmm5, %[outbuf6]\n\t" + "movdqa %%xmm11, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm10\n\t" + "movdqu %%xmm0, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" - "movdqu %[inbuf7], %%xmm11\n\t" + "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm0, %%xmm5\n\t" + "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), @@ -2655,14 +2807,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" - "pxor %%xmm2, %%xmm1\n\t" - "pxor %%xmm4, %%xmm1\n\t" - "pxor %%xmm9, %%xmm1\n\t" - "pxor %%xmm11, %%xmm1\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm8, %%xmm6\n\t" - "pxor %%xmm10, %%xmm6\n\t" - "pxor %%xmm1, %%xmm6\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), @@ -2678,7 +2822,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2688,40 +2832,47 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - asm volatile ("movdqu %[l0], %%xmm4\n\t" + /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i) */ + + asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqu %%xmm5, %[outbuf0]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + "movdqa %[l0l1], %%xmm3\n\t" + : : [l0] "m" (*c->u_mode.ocb.L[0]), + [l0l1] "m" (*l0l1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l1], %%xmm0\n\t" - "movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm0, %%xmm5\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqu %%xmm5, %[outbuf1]\n\t" - : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + asm volatile ("movdqa %[l0l1l0], %%xmm4\n\t" + "movdqu %[l3], %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm0, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0l1l0] "m" (*l0l1l0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm4, %%xmm5\n\t" + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" - "movdqu %%xmm5, %[outbuf2]\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "movdqu %%xmm3, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm0, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" + asm volatile ("pxor %%xmm6, %%xmm5\n\t" + "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_dec_vec4 (ctx); @@ -2737,10 +2888,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm4, %%xmm6\n\t" : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), @@ -2771,7 +2918,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" - "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : @@ -2783,14 +2929,23 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" - "movdqu %%xmm6, %[ctr]\n\t" - : [iv] "=m" (*c->u_iv.iv), - [ctr] "=m" (*c->u_ctr.ctr) + : [iv] "=m" (*c->u_iv.iv) : : "memory" ); + aesni_ocb_checksum (c, outbuf_arg, nblocks_arg); + + asm volatile ("pxor %%xmm0, %%xmm0\n\t" + "movdqa %%xmm0, %[l0l1]\n\t" + "movdqa %%xmm0, %[l0l1l0]\n\t" + : [l0l1] "=m" (*l0l1), + [l0l1l0] "=m" (*l0l1l0) + : + : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); + + return 0; } @@ -2799,11 +2954,9 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { if (encrypt) - aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else - aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); - - return 0; + return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); } @@ -2815,10 +2968,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" @@ -2856,9 +3009,9 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); asm volatile ("movdqu %[l0], %%xmm7\n\t" "movdqu %[l1], %%xmm12\n\t" @@ -2948,7 +3101,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, abuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -3038,7 +3191,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); return 0; } @@ -3053,10 +3206,10 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); /* Preload Tweak */ asm volatile ("movdqu %[tweak], %%xmm5\n\t" @@ -3182,7 +3335,7 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -3191,10 +3344,10 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { @@ -3326,7 +3479,7 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 160fb8c36..1dcfcd5e4 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -143,6 +143,7 @@ typedef struct RIJNDAEL_context_s #endif /*USE_PADLOCK*/ #ifdef USE_AESNI unsigned int use_aesni:1; /* AES-NI shall be used. */ + unsigned int use_avx2:1; /* AVX2 shall be used. */ #endif /*USE_AESNI*/ #ifdef USE_SSSE3 unsigned int use_ssse3:1; /* SSSE3 shall be used. */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 1bc8b0fc2..e8ec7993b 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -334,6 +334,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; + ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2); if (hd) { hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc; diff --git a/tests/basic.c b/tests/basic.c index f3d895153..0afae3047 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -4411,11 +4411,114 @@ do_check_ocb_cipher (int inplace) "1792A4E31E0755FB03E31B22116E6C2DDF9EFD6E33D536F1" "A0124B0A55BAE884ED93481529C76B6AD0C515F4D1CDD4FD" "AC4F02AA" + }, + { GCRY_CIPHER_AES, 12, "0F0E0D0C0B0A09080706050403020100", + "BBAA9988776655443322110D", + "000102030405060708090A0B0C0D0E0F1011121314151617" + "18191A1B1C1D1E1F2021222324252627", + /* test vector for checksumming */ + "01000000000000000000000000000000" + "02000000000000000000000000000000" + "04000000000000000000000000000000" + "08000000000000000000000000000000" + "10000000000000000000000000000000" + "20000000000000000000000000000000" + "40000000000000000000000000000000" + "80000000000000000000000000000000" + "00010000000000000000000000000000" + "00020000000000000000000000000000" + "00040000000000000000000000000000" + "00080000000000000000000000000000" + "00100000000000000000000000000000" + "00200000000000000000000000000000" + "00400000000000000000000000000000" + "00800000000000000000000000000000" + "00000100000000000000000000000000" + "00000200000000000000000000000000" + "00000400000000000000000000000000" + "00000800000000000000000000000000" + "00001000000000000000000000000000" + "00002000000000000000000000000000" + "00004000000000000000000000000000" + "00008000000000000000000000000000" + "00000001000000000000000000000000" + "00000002000000000000000000000000" + "00000004000000000000000000000000" + "00000008000000000000000000000000" + "00000010000000000000000000000000" + "00000020000000000000000000000000" + "00000040000000000000000000000000" + "00000080000000000000000000000000" + "00000000010000000000000000000000" + "00000000020000000000000000000000" + "00000000040000000000000000000000" + "00000000080000000000000000000000" + "00000000100000000000000000000000" + "00000000200000000000000000000000" + "00000000400000000000000000000000" + "00000000800000000000000000000000" + "00000000000100000000000000000000" + "00000000000200000000000000000000" + "00000000000400000000000000000000" + "00000000000800000000000000000000" + "00000000001000000000000000000000" + "00000000002000000000000000000000" + "00000000004000000000000000000000" + "00000000008000000000000000000000", + "01105c6e36f6ac480f022c51e31ed702" + "90fda4b7b783194d4b4be8e4e1e2dff4" + "6a0804d1c5f9f808ea7933e31c063233" + "2bf65a22b20bb13cde3b80b3682ba965" + "b1207c58916f7856fa9968b410e50dee" + "98b35c071163d1b352b9bbccd09fde29" + "b850f40e71a8ae7d2e2d577f5ee39c46" + "7fa28130b50a123c29958e4665dda9a5" + "e0793997f8f19633a96392141d6e0e88" + "77850ed4364065d1d2f8746e2f1d5fd1" + "996cdde03215306503a30e41f58ef3c4" + "400365cfea4fa6381157c12a46598edf" + "18604854462ec66e3d3cf26d4723cb6a" + "9d801095048086a606fdb9192760889b" + "a8ce2e70e1b55a469137a9e2e6734565" + "283cb1e2c74f37e0854d03e33f8ba499" + "ef5d9af4edfce077c6280338f0a64286" + "2e6bc27ebd5a4c91b3778e22631251c8" + "c5bb75a10945597a9d6c274fc82d3338" + "b403a0a549d1375f26e71ef22bce0941" + "93ea87e2ed72fce0546148c351eec3be" + "867bb1b96070c377fff3c98e21562beb" + "475cfe28abcaaedf49981f6599b15140" + "ea6130d24407079f18ba9d4a8960b082" + "b39c57320e2e064f02fde88c23112146" + "1cac3655868aef584714826ee4f361fb" + "e6d692e1589cbb9dd3c74fa628df2a1f" + "3b0029b1d62b7e9978013ed3c793c1dd" + "1f184c8f7022a853cac40b74ac749aa3" + "f33f0d14732dfda0f2c3c20591bf1f5a" + "710ec0d0bca342baa5146068a78ff58c" + "66316312b7a98af35a0f4e92799b4047" + "f047ae61f25c28d232ce5c168cc745d6" + "6da13cb0f9e38a696635dba7a21571cf" + "cd64ec8cc33db7879f59a90d9edd00f6" + "a899e39ab36b9269a3ac04ebad9326bf" + "53cd9b400168a61714cd628a4056d236" + "bd8622c76daa54cb65f5db2fe03bafbe" + "0b23549ae31136f607293e8093a21934" + "74fd5e9c2451b4c8e0499e6ad34fafc8" + "ab77722a282f7f84b14ddebf7e696300" + "c1ef92d4a0263c6cca104530f996e272" + "f58992ff68d642b071a5848dc4acf2ae" + "28fb1f27ae0f297d5136a7a0a4a03e89" + "b588755b8217a1c62773790e69261269" + "19f45daf7b3ccf18e3fc590a9a0e172f" + "033ac4d13c3decc4c62d7de718ace802" + "140452dc850989f6762e3578bbb04be3" + "1a237c599c4649f4e586b2de" } }; gpg_error_t err = 0; gcry_cipher_hd_t hde, hdd; - unsigned char out[MAX_DATA_LEN]; + unsigned char out[1024]; unsigned char tag[16]; int tidx; @@ -4548,7 +4651,7 @@ do_check_ocb_cipher (int inplace) } else { - err = gcry_cipher_encrypt (hde, out, MAX_DATA_LEN, + err = gcry_cipher_encrypt (hde, out, sizeof(out), plain, plainlen); } } @@ -4605,7 +4708,7 @@ do_check_ocb_cipher (int inplace) } else { - unsigned char tmp[MAX_DATA_LEN]; + unsigned char tmp[sizeof(out)]; memcpy(tmp, out, plainlen); err = gcry_cipher_decrypt (hdd, out, plainlen, tmp, plainlen); @@ -4696,7 +4799,7 @@ check_ocb_cipher_largebuf_split (int algo, int keylen, const char *tagexpect, } for (i = 0; i < buflen; i++) - inbuf[i] = 'a'; + inbuf[i] = (i + 181081) * 5039; err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0); if (!err) @@ -4854,6 +4957,131 @@ out_free: } +static void +check_ocb_cipher_checksum (int algo, int keylen) +{ + static const unsigned char key[32] = + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + static const unsigned char nonce[12] = + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03"; + const size_t buflen = 128 * 16; + unsigned char *inbuf, *outbuf; + gpg_error_t err = 0; + gcry_cipher_hd_t hde, hde2; + unsigned char tag[16]; + unsigned char tag2[16]; + int i; + + inbuf = xmalloc(buflen); + if (!inbuf) + { + fail ("out-of-memory\n"); + return; + } + outbuf = xmalloc(buflen); + if (!inbuf) + { + fail ("out-of-memory\n"); + xfree(inbuf); + return; + } + + memset(inbuf, 0, buflen); + for (i = 0; i < 128; i += 16) + { + unsigned char *blk = inbuf + i; + int bit2set = i / 16; + int byteidx = bit2set / 8; + int bitpos = bit2set % 8; + + blk[byteidx] |= 1 << bitpos; + } + + err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0); + if (!err) + err = gcry_cipher_open (&hde2, algo, GCRY_CIPHER_MODE_OCB, 0); + if (err) + { + fail ("cipher-ocb, gcry_cipher_open failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + goto out_free; + } + + err = gcry_cipher_setkey (hde, key, keylen); + if (!err) + err = gcry_cipher_setkey (hde2, key, keylen); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setkey failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + err = gcry_cipher_setiv (hde, nonce, 12); + if (!err) + err = gcry_cipher_setiv (hde2, nonce, 12); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setiv failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + err = gcry_cipher_final (hde); + if (!err) + { + err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen); + } + for (i = 0; i < buflen && !err; i += 16) + { + if (i + 16 == buflen) + err = gcry_cipher_final (hde2); + if (!err) + err = gcry_cipher_encrypt (hde2, outbuf + i, 16, inbuf + i, 16); + } + + if (err) + { + fail ("cipher-ocb, gcry_cipher_encrypt failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + /* Check that the tag matches. */ + err = gcry_cipher_gettag (hde, tag, 16); + if (err) + { + fail ("cipher_ocb, gcry_cipher_gettag failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + } + err = gcry_cipher_gettag (hde2, tag2, 16); + if (err) + { + fail ("cipher_ocb, gcry_cipher_gettag failed (checksum2, algo %d): %s\n", + algo, gpg_strerror (err)); + } + if (memcmp (tag, tag2, 16)) + { + mismatch (tag, 16, tag2, 16); + fail ("cipher-ocb, encrypt tag mismatch (checksum, algo %d)\n", algo); + } + + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + +out_free: + xfree(inbuf); + xfree(outbuf); +} + + static void check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect) { @@ -4863,6 +5091,8 @@ check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect) { check_ocb_cipher_largebuf_split(algo, keylen, tagexpect, split); } + + check_ocb_cipher_checksum(algo, keylen); } @@ -5108,35 +5338,25 @@ check_ocb_cipher (void) /* Check large buffer encryption/decryption. */ check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16, - "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8" - "\x33\xfd\x7a\x4f\x42\x60\x5d\x20"); + "\xc1\x5b\xf1\x80\xa4\xd5\xea\xfd\xae\x17\xa6\xcd\x6b\x10\xa8\xea"); check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32, - "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" - "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); + "\x2b\xb7\x25\x6b\x77\xc7\xfb\x21\x5c\xc9\x6c\x36\x17\x1a\x1a\xd5"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16, - "\x28\x23\x38\x45\x2b\xfd\x42\x45" - "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd"); + "\xe0\xae\x3f\x29\x3a\xee\xd8\xe3\xf2\x20\xc1\xa2\xd8\x72\x12\xd9"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24, - "\xee\xca\xe5\x39\x27\x2d\x33\xe7" - "\x79\x74\xb0\x1d\x37\x12\xd5\x6c"); + "\xd7\x98\x71\xcf\x19\x5c\xa3\x3d\x6c\xfc\xc9\xbe\x9f\x13\x6b\xbd"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32, - "\x39\x39\xd0\x2d\x05\x68\x74\xee" - "\x18\x6b\xea\x3d\x0b\xd3\x58\xae"); + "\x03\xf6\xec\x1a\x0e\xae\x66\x24\x2b\xba\x26\x0f\xb3\xb3\x1f\xb9"); check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 16, - "\x63\xe3\x0e\xb9\x11\x6f\x14\xba" - "\x79\xe4\xa7\x9e\xad\x3c\x02\x0c"); + "\x1c\xf9\xc7\xfc\x3a\x32\xac\xc7\x5e\x0a\xc2\x5c\x90\xd6\xf6\xf9"); check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32, - "\xf6\xd4\xfe\x4e\x50\x85\x13\x59" - "\x69\x0e\x4c\x67\x3e\xdd\x47\x90"); + "\x53\x02\xc8\x0d\x4e\x9a\x44\x9e\x43\xd4\xaa\x06\x30\x93\xcc\x16"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT128, 16, - "\x3c\xfb\x66\x14\x3c\xc8\x6c\x67" - "\x26\xb8\x23\xeb\xaf\x43\x98\x69"); + "\xd3\x64\xac\x40\x48\x88\x77\xe2\x41\x26\x4c\xde\x21\x29\x21\x8d"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT192, 24, - "\x5e\x62\x27\xc5\x32\xc3\x1d\xe6" - "\x2e\x65\xe7\xd6\xfb\x05\xd7\xb2"); + "\x99\xeb\x35\xb0\x62\x4e\x7b\xf1\x5e\x9f\xed\x32\x78\x90\x0b\xd0"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT256, 32, - "\xe7\x8b\xe6\xd4\x2f\x7a\x36\x4c" - "\xba\xee\x20\xe2\x68\xf4\xcb\xcc"); + "\x71\x66\x2f\x68\xbf\xdd\xcc\xb1\xbf\x81\x56\x5f\x01\x73\xeb\x44"); /* Check that the AAD data is correctly buffered. */ check_ocb_cipher_splitaad (); From yann.garcia at fscom.fr Mon Nov 12 13:56:11 2018 From: yann.garcia at fscom.fr (Yann Garcia) Date: Mon, 12 Nov 2018 13:56:11 +0100 Subject: Libgcrypt: How to implement ECIES encryption (ECC + AES) In-Reply-To: <9348472.bpM0KbGFcq@tauon.chronox.de> References: <9348472.bpM0KbGFcq@tauon.chronox.de> Message-ID: Hello Stephen, Thanks a lot for your response. Does it mean that RFC 2898. is not supported? Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)6 68 94 57 76 Email: *yann.garcia at fscom.fr * Skype: yann.garcia Google+: garcia.yann at gmail.com On Mon, 12 Nov 2018 at 13:47, Stephan Mueller wrote: > Am Montag, 12. November 2018, 13:24:01 CET schrieb Yann Garcia: > > Hi Yann, > > > Dear All, > > > > I want to implement ECIES encryption. To do it, I have to 1) generate > ECDH > > secret key (the key agreement) and then 2) derive it using Key Derivation > > Function 2 (KDF2). > > > > 1) For the key agreement, I need to generate a random number. > > You need to have the local private ECDH key and the remote public ECDH > key. A > plain random number does not work as you need to send the local public > ECDH > key to your recipient as otherwise he cannot decrypt it. > > > Can you > > indicate me the best way to generate this random number because in the > > help, clause 9.2 Retrieving random numbers, I found 3 different methods > and > > I don't know which one is the best for my purpose? > > > > 2) Is it correct to use the function gcry_kdf_derive() with > GCRY_KDF_PBKDF2 > > flag to do the Key Derivation Function 2? > > Nope, PBKDF is not allowed. Key-based KDFs like SP800-108 or RFC5967 are > ok. I > am not sure libgcrypt offers a native implementation of those. > > > > 3) Does the libgcrypt provides samples for ECIES encryption? > > > > Many thanks in advance for your help, > > > > Best regards, > > Yann Garcia > > Senior Software Engineer > > Microsoft MCAD.net Certified > > ************************************** > > FSCOM SARL > > Le Montespan B2 > > 6, > > < > https://maps.google.com/?q=6,%C2%A0+Avenue+des+Alpes&entry=gmail&source=g> > > Avenue des Alpes > > < > https://maps.google.com/?q=6,%C2%A0+Avenue+des+Alpes&entry=gmail&source=g> > > F-06600 Antibes, FRANCE > > ************************************************ > > Tel: +33 (0)4 92 94 49 08 > > Mobile: +33 (0)7 61 00 77 05 > > Email: *yann.garcia at fscom.fr* > > Yann.Garcia_EXT at etsi.org > > Skype: yann.garcia > > Google+: garcia.yann at gmail.com > > > > Ciao > Stephan > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From smueller at chronox.de Mon Nov 12 14:02:30 2018 From: smueller at chronox.de (Stephan Mueller) Date: Mon, 12 Nov 2018 14:02:30 +0100 Subject: Libgcrypt: How to implement ECIES encryption (ECC + AES) In-Reply-To: References: <9348472.bpM0KbGFcq@tauon.chronox.de> Message-ID: <3727559.FsvCGvR84V@tauon.chronox.de> Am Montag, 12. November 2018, 13:56:11 CET schrieb Yann Garcia: Hi Yann, > Hello Stephen, > > Thanks a lot for your response. > Does it mean that RFC 2898. is not supported? Sure, PKCS5 (aka PBKDF2) is supported by libgcrypt. But it is *password*-based key derivation. ECIES however implies that you need to generate a key from the ECDH shared secreted. I.e. you need a *key*-based key derivation function (SP800-108, RFC5869 etc.). Ciao Stephan From smueller at chronox.de Mon Nov 12 13:47:48 2018 From: smueller at chronox.de (Stephan Mueller) Date: Mon, 12 Nov 2018 13:47:48 +0100 Subject: Libgcrypt: How to implement ECIES encryption (ECC + AES) In-Reply-To: References: Message-ID: <9348472.bpM0KbGFcq@tauon.chronox.de> Am Montag, 12. November 2018, 13:24:01 CET schrieb Yann Garcia: Hi Yann, > Dear All, > > I want to implement ECIES encryption. To do it, I have to 1) generate ECDH > secret key (the key agreement) and then 2) derive it using Key Derivation > Function 2 (KDF2). > > 1) For the key agreement, I need to generate a random number. You need to have the local private ECDH key and the remote public ECDH key. A plain random number does not work as you need to send the local public ECDH key to your recipient as otherwise he cannot decrypt it. > Can you > indicate me the best way to generate this random number because in the > help, clause 9.2 Retrieving random numbers, I found 3 different methods and > I don't know which one is the best for my purpose? > > 2) Is it correct to use the function gcry_kdf_derive() with GCRY_KDF_PBKDF2 > flag to do the Key Derivation Function 2? Nope, PBKDF is not allowed. Key-based KDFs like SP800-108 or RFC5967 are ok. I am not sure libgcrypt offers a native implementation of those. > > 3) Does the libgcrypt provides samples for ECIES encryption? > > Many thanks in advance for your help, > > Best regards, > Yann Garcia > Senior Software Engineer > Microsoft MCAD.net Certified > ************************************** > FSCOM SARL > Le Montespan B2 > 6, > > Avenue des Alpes > > F-06600 Antibes, FRANCE > ************************************************ > Tel: +33 (0)4 92 94 49 08 > Mobile: +33 (0)7 61 00 77 05 > Email: *yann.garcia at fscom.fr* > Yann.Garcia_EXT at etsi.org > Skype: yann.garcia > Google+: garcia.yann at gmail.com Ciao Stephan From cvs at cvs.gnupg.org Tue Nov 13 03:20:37 2018 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Tue, 13 Nov 2018 03:20:37 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-127-g8522453 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 852245390ef7fd8ca9e36010886a4cf42cf710bf (commit) via bea193446351c24b10a4342466978d57bd53f599 (commit) from 3f76319803a4abcd33fa29a0ac39f8ed9d646226 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 852245390ef7fd8ca9e36010886a4cf42cf710bf Author: NIIBE Yutaka Date: Tue Nov 13 10:30:39 2018 +0900 libgcrypt.m4: Prefer gpgrt-config to SYSROOT support. * libgcrypt.m4: Move SYSROOT support after check of GPGRT_CONFIG. Signed-off-by: NIIBE Yutaka diff --git a/src/libgcrypt.m4 b/src/libgcrypt.m4 index 40ea01c..37dfbea 100644 --- a/src/libgcrypt.m4 +++ b/src/libgcrypt.m4 @@ -9,7 +9,7 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Last-changed: 2018-11-02 +# Last-changed: 2018-11-13 dnl AM_PATH_LIBGCRYPT([MINIMUM-VERSION, @@ -36,19 +36,6 @@ AC_DEFUN([AM_PATH_LIBGCRYPT], if test x"${LIBGCRYPT_CONFIG}" = x ; then if test x"${libgcrypt_config_prefix}" != x ; then LIBGCRYPT_CONFIG="${libgcrypt_config_prefix}/bin/libgcrypt-config" - else - case "${SYSROOT}" in - /*) - if test -x "${SYSROOT}/bin/libgcrypt-config" ; then - LIBGCRYPT_CONFIG="${SYSROOT}/bin/libgcrypt-config" - fi - ;; - '') - ;; - *) - AC_MSG_WARN([Ignoring \$SYSROOT as it is not an absolute path.]) - ;; - esac fi fi @@ -61,6 +48,20 @@ AC_DEFUN([AM_PATH_LIBGCRYPT], fi fi if test -z "$use_gpgrt_config"; then + if test x"${LIBGCRYPT_CONFIG}" = x ; then + case "${SYSROOT}" in + /*) + if test -x "${SYSROOT}/bin/libgcrypt-config" ; then + LIBGCRYPT_CONFIG="${SYSROOT}/bin/libgcrypt-config" + fi + ;; + '') + ;; + *) + AC_MSG_WARN([Ignoring \$SYSROOT as it is not an absolute path.]) + ;; + esac + fi AC_PATH_PROG(LIBGCRYPT_CONFIG, libgcrypt-config, no) fi commit bea193446351c24b10a4342466978d57bd53f599 Author: NIIBE Yutaka Date: Tue Nov 13 09:36:37 2018 +0900 build: Update autogen.rc. * autogen.rc: Remove obsolete --with-gpg-error-prefix option. Signed-off-by: NIIBE Yutaka diff --git a/autogen.rc b/autogen.rc index 09a9b9c..646f659 100644 --- a/autogen.rc +++ b/autogen.rc @@ -2,15 +2,11 @@ case "$myhost" in w32) - configure_opts=" - --with-gpg-error-prefix=@SYSROOT@ - " + configure_opts="" ;; amd64) - configure_opts=" - --with-gpg-error-prefix=@SYSROOT@ - " + configure_opts="" ;; esac ----------------------------------------------------------------------- Summary of changes: autogen.rc | 8 ++------ src/libgcrypt.m4 | 29 +++++++++++++++-------------- 2 files changed, 17 insertions(+), 20 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From garcia.yann at gmail.com Tue Nov 13 09:16:58 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Tue, 13 Nov 2018 09:16:58 +0100 Subject: How to use nist_generate_key Message-ID: Hello, I'm looking for sample indicating of to use the function nist_generate_key() using ECDH? Can someone provide me some hits, I tried to google nist_generate_key but I didn't find any sample? Many thanks in advance, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From cvs at cvs.gnupg.org Wed Nov 14 14:15:45 2018 From: cvs at cvs.gnupg.org (by Werner Koch) Date: Wed, 14 Nov 2018 14:15:45 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-128-gaa686df Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via aa686dfc9b563ff79c01d2f8560b88f69c42ecba (commit) from 852245390ef7fd8ca9e36010886a4cf42cf710bf (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit aa686dfc9b563ff79c01d2f8560b88f69c42ecba Author: Werner Koch Date: Wed Nov 14 14:14:23 2018 +0100 random: Initialize variable as requested by valgrind random/jitterentropy-base.c: Init. -- The variable ec does not need initialization for proper functioning of the analyzer code. However, valgrind complains about the uninitialized variable. Thus, initialize it. Original-repo: https://github.com/smuellerDD/jitterentropy-library.git Original-commit: 9048af7f06fc1488904f54852e0a2f8da45a4745 Original-Author:: Stephan Mueller Original-Date: Sun, 15 Jul 2018 19:14:02 +0200 Reported-by: Steve Grubb Signed-off-by: Werner Koch diff --git a/random/jitterentropy-base.c b/random/jitterentropy-base.c index dc907b2..32fdea4 100644 --- a/random/jitterentropy-base.c +++ b/random/jitterentropy-base.c @@ -642,6 +642,8 @@ int jent_entropy_init(void) int count_stuck = 0; struct rand_data ec; + memset(&ec, 0, sizeof(ec)); + /* We could perform statistical tests here, but the problem is * that we only have a few loop counts to do testing. These * loop counts may show some slight skew and we produce ----------------------------------------------------------------------- Summary of changes: random/jitterentropy-base.c | 2 ++ 1 file changed, 2 insertions(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From wk at gnupg.org Wed Nov 14 18:49:48 2018 From: wk at gnupg.org (Werner Koch) Date: Wed, 14 Nov 2018 18:49:48 +0100 Subject: How to use nist_generate_key In-Reply-To: (Yann Garcia's message of "Tue, 13 Nov 2018 09:16:58 +0100") References: Message-ID: <877ehflfb7.fsf@wheatstone.g10code.de> On Tue, 13 Nov 2018 09:16, garcia.yann at gmail.com said: > I'm looking for sample indicating of to use the function > nist_generate_key() using ECDH? That is an internal function of libgcrypt and you can't use it from your application. You need to use Libgcrypt's API to do that. Check the manual for the gcry_pk_genkey function. To create an ECC keypair for the NIST P-521 curve, you need to use these parameters "(genkey(ecc(curve nistp521)(flags nocomp)))" convert them to an s-expression object and pass it to gcry_pk_genkey. There are several examples for it in libgcrypt's soruce code (under tests/) and you can also look into GnuPG (agent/genkey.c:agent_genkey is the core which is called from g10/keygen.c:gen_ecc). Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 227 bytes Desc: not available URL: From yann.garcia at fscom.fr Wed Nov 14 18:57:44 2018 From: yann.garcia at fscom.fr (Yann Garcia) Date: Wed, 14 Nov 2018 18:57:44 +0100 Subject: How to use nist_generate_key In-Reply-To: <877ehflfb7.fsf@wheatstone.g10code.de> References: <877ehflfb7.fsf@wheatstone.g10code.de> Message-ID: Hello Werner, Thank you for your answer. Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)6 68 94 57 76 Email: *yann.garcia at fscom.fr * Skype: yann.garcia Google+: garcia.yann at gmail.com On Wed, 14 Nov 2018 at 18:50, Werner Koch wrote: > On Tue, 13 Nov 2018 09:16, garcia.yann at gmail.com said: > > > I'm looking for sample indicating of to use the function > > nist_generate_key() using ECDH? > > That is an internal function of libgcrypt and you can't use it from your > application. You need to use Libgcrypt's API to do that. Check the > manual for the gcry_pk_genkey function. To create an ECC keypair for > the NIST P-521 curve, you need to use these parameters > > "(genkey(ecc(curve nistp521)(flags nocomp)))" > > convert them to an s-expression object and pass it to gcry_pk_genkey. > There are several examples for it in libgcrypt's soruce code (under > tests/) and you can also look into GnuPG (agent/genkey.c:agent_genkey is > the core which is called from g10/keygen.c:gen_ecc). > > > Salam-Shalom, > > Werner > > -- > Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. > -------------- next part -------------- An HTML attachment was scrubbed... URL: From ametzler at bebt.de Sun Nov 18 16:11:50 2018 From: ametzler at bebt.de (Andreas Metzler) Date: Sun, 18 Nov 2018 16:11:50 +0100 Subject: documentation: Avoid too tight binary dependency Message-ID: <20181118151150.GC31413@argenau.bebt.de> Hello, find attached a trivial patch against GIT master to stop suggesting gcry_check_version (GCRYPT_VERSION) since this will fail incorrectly. e.g. a binary built against 1.8.4 does run perfectly well against libgcrypt 1.8.3. Instead let's suggest doing what gnupg2 does, specifying the version that is needed to build successfully. Thanks for considering, cu Andreas -- `What a good friend you are to him, Dr. Maturin. His other friends are so grateful to you.' `I sew his ears on from time to time, sure' -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-doc-Fix-library-initialization-examples.patch Type: text/x-diff Size: 1836 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: not available URL: From cvs at cvs.gnupg.org Mon Nov 19 09:03:32 2018 From: cvs at cvs.gnupg.org (by Andreas Metzler) Date: Mon, 19 Nov 2018 09:03:32 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-129-gaf0bbdb Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via af0bbdb9019e0b4a72e87e8b1b4a55506d349834 (commit) from aa686dfc9b563ff79c01d2f8560b88f69c42ecba (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit af0bbdb9019e0b4a72e87e8b1b4a55506d349834 Author: Andreas Metzler Date: Sun Nov 18 16:01:21 2018 +0100 doc: Fix library initialization examples Signed-off-by: Andreas Metzler diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index 4f4f973..5f20a54 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -382,10 +382,12 @@ memory is not a problem, you should initialize Libgcrypt this way: @example /* Version check should be the very first call because it - makes sure that important subsystems are initialized. */ - if (!gcry_check_version (GCRYPT_VERSION)) + makes sure that important subsystems are initialized. + #define NEED_LIBGCRYPT_VERSION to the minimum required version. */ + if (!gcry_check_version (NEED_LIBGCRYPT_VERSION)) @{ - fputs ("libgcrypt version mismatch\n", stderr); + fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n", + NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL)); exit (2); @} @@ -405,10 +407,12 @@ and freed memory, you need to initialize Libgcrypt this way: @example /* Version check should be the very first call because it - makes sure that important subsystems are initialized. */ - if (!gcry_check_version (GCRYPT_VERSION)) + makes sure that important subsystems are initialized. + #define NEED_LIBGCRYPT_VERSION to the minimum required version. */ + if (!gcry_check_version (NEED_LIBGCRYPT_VERSION)) @{ - fputs ("libgcrypt version mismatch\n", stderr); + fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n", + NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL)); exit (2); @} ----------------------------------------------------------------------- Summary of changes: doc/gcrypt.texi | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From wk at gnupg.org Mon Nov 19 09:04:41 2018 From: wk at gnupg.org (Werner Koch) Date: Mon, 19 Nov 2018 09:04:41 +0100 Subject: documentation: Avoid too tight binary dependency In-Reply-To: <20181118151150.GC31413@argenau.bebt.de> (Andreas Metzler's message of "Sun, 18 Nov 2018 16:11:50 +0100") References: <20181118151150.GC31413@argenau.bebt.de> Message-ID: <87bm6lcx2e.fsf@wheatstone.g10code.de> On Sun, 18 Nov 2018 16:11, ametzler at bebt.de said: > find attached a trivial patch against GIT master to stop suggesting > gcry_check_version (GCRYPT_VERSION) Applied. Thanks. Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 227 bytes Desc: not available URL: From jussi.kivilinna at iki.fi Tue Nov 20 19:51:02 2018 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Tue, 20 Nov 2018 20:51:02 +0200 Subject: [PATCH] Use explicit_bzero for wipememory Message-ID: <154273986264.23928.6596778745751819274.stgit@localhost.localdomain> * configure.ac (AC_CHECK_FUNCS): Check for 'explicit_bzero'. * src/g10lib.h (wipememory2): Use _gcry_fast_wipememory if _SET is zero. (_gcry_fast_wipememory): New. (_gcry_wipememory2): Rename to... (_gcry_fast_wipememory2): ...this. * src/misc.c (_gcry_wipememory): New. (_gcry_wipememory2): Rename to... (_gcry_fast_wipememory2): ...this. (_gcry_fast_wipememory2) [HAVE_EXPLICIT_BZERO]: Use explicit_bzero if SET is zero. (_gcry_burn_stack): Use _gcry_fast_wipememory. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/configure.ac b/configure.ac index 9803d518b..5843884c6 100644 --- a/configure.ac +++ b/configure.ac @@ -1772,6 +1772,7 @@ AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile) +AC_CHECK_FUNCS(explicit_bzero) GNUPG_CHECK_MLOCK diff --git a/src/g10lib.h b/src/g10lib.h index 9b2147812..694c2d83e 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -334,15 +334,16 @@ void __gcry_burn_stack (unsigned int bytes); do { __gcry_burn_stack (bytes); \ __gcry_burn_stack_dummy (); } while(0) - /* To avoid that a compiler optimizes certain memset calls away, these macros may be used instead. For small constant length buffers, memory wiping is inlined. For non-constant or large length buffers, - memory is wiped with memset through _gcry_wipememory. */ -void _gcry_wipememory2(void *ptr, int set, size_t len); + memory is wiped with memset through _gcry_fast_wipememory. */ #define wipememory2(_ptr,_set,_len) do { \ if (!CONSTANT_P(_len) || _len > 64) { \ - _gcry_wipememory2((void *)_ptr, _set, _len); \ + if (CONSTANT_P(_set) && (_set) == 0) \ + _gcry_fast_wipememory((void *)_ptr, _len); \ + else \ + _gcry_fast_wipememory2((void *)_ptr, _set, _len); \ } else {\ volatile char *_vptr = (volatile char *)(_ptr); \ size_t _vlen = (_len); \ @@ -353,6 +354,9 @@ void _gcry_wipememory2(void *ptr, int set, size_t len); } while(0) #define wipememory(_ptr,_len) wipememory2(_ptr,0,_len) +void _gcry_fast_wipememory(void *ptr, size_t len); +void _gcry_fast_wipememory2(void *ptr, int set, size_t len); + #if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) diff --git a/src/misc.c b/src/misc.c index 420ce74db..bb39e1c2f 100644 --- a/src/misc.c +++ b/src/misc.c @@ -32,6 +32,8 @@ static int verbosity_level = 0; +/* Prevent compiler from optimizing away the call to memset by accessing + memset through volatile pointer. */ static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset; static void (*fatal_error_handler)(void*,int, const char*) = NULL; @@ -500,8 +502,37 @@ _gcry_strtokenize (const char *string, const char *delim) void -_gcry_wipememory2 (void *ptr, int set, size_t len) +_gcry_fast_wipememory (void *ptr, size_t len) { + /* Note: This function is called from wipememory/wipememory2 only if LEN + is large or unknown at compile time. New wipe function alternatives + need to be checked before adding to this function. New implementations + need to be faster than wipememory/wipememory2 macros in 'misc.h'. + + Following implementations were found to have suboptimal performance: + + - [_WIN32/mingw32] SecureZeroMemory; Inline function, equivalent to + volatile byte buffer set: while(buflen--) (volatile char *)(buf++)=set; + */ +#ifdef HAVE_EXPLICIT_BZERO + explicit_bzero (ptr, len); +#else + memset_ptr (ptr, 0, len); +#endif +} + + +void +_gcry_fast_wipememory2 (void *ptr, int set, size_t len) +{ +#ifdef HAVE_EXPLICIT_BZERO + if (set == 0) + { + explicit_bzero (ptr, len); + return; + } +#endif + memset_ptr (ptr, set, len); } @@ -514,11 +545,11 @@ __gcry_burn_stack (unsigned int bytes) unsigned int buflen = ((!bytes + bytes) + 63) & ~63; char buf[buflen]; - memset_ptr (buf, 0, buflen); + _gcry_fast_wipememory (buf, buflen); #else volatile char buf[64]; - wipememory (buf, sizeof buf); + _gcry_fast_wipememory (buf, sizeof buf); if (bytes > sizeof buf) _gcry_burn_stack (bytes - sizeof buf); From cvs at cvs.gnupg.org Tue Nov 20 20:17:52 2018 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Tue, 20 Nov 2018 20:17:52 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-131-g9d9c4fd Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 9d9c4fd18b445ff414d11678285d54af3afdb222 (commit) via b42de67f34871a2520cfe370af513f2aab6e4f75 (commit) from af0bbdb9019e0b4a72e87e8b1b4a55506d349834 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 9d9c4fd18b445ff414d11678285d54af3afdb222 Author: Jussi Kivilinna Date: Tue Nov 20 21:16:08 2018 +0200 Add clang target pragma for mixed C/assembly x86-64 implementations * cipher/cipher-gcm-intel-pclmul.c: Add target 'no-sse' attribute pragma for clang. * cipher/crc-intel-pclmul.c: Ditto. * cipher/rijndael-aesni.c: Ditto. * cipher/rijndael-ssse3-amd64.c: Ditto. * cipher/sha1-intel-shaext.c: Ditto. * cipher/sha256-intel-shaext.c: Ditto. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c index 0f26277..60ae7aa 100644 --- a/cipher/cipher-gcm-intel-pclmul.c +++ b/cipher/cipher-gcm-intel-pclmul.c @@ -37,6 +37,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* @@ -474,4 +477,8 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/cipher/crc-intel-pclmul.c b/cipher/crc-intel-pclmul.c index 8ff08ec..482b260 100644 --- a/cipher/crc-intel-pclmul.c +++ b/cipher/crc-intel-pclmul.c @@ -39,6 +39,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif #define ALIGNED_16 __attribute__ ((aligned (16))) @@ -922,4 +925,8 @@ _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) #endif } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_INTEL_PCLMUL */ diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index c1ebab0..483387c 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -39,6 +39,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif #define ALWAYS_INLINE inline __attribute__((always_inline)) @@ -3514,4 +3517,8 @@ _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks); } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_AESNI */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index fa481bb..0c1ae6e 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -55,6 +55,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l @@ -726,4 +729,8 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* USE_SSSE3 */ diff --git a/cipher/sha1-intel-shaext.c b/cipher/sha1-intel-shaext.c index 5a2349e..d7e3d4f 100644 --- a/cipher/sha1-intel-shaext.c +++ b/cipher/sha1-intel-shaext.c @@ -29,6 +29,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between @@ -278,4 +281,8 @@ _gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ diff --git a/cipher/sha256-intel-shaext.c b/cipher/sha256-intel-shaext.c index 0c107bb..2eda42d 100644 --- a/cipher/sha256-intel-shaext.c +++ b/cipher/sha256-intel-shaext.c @@ -29,6 +29,9 @@ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between @@ -349,4 +352,8 @@ _gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *data, return 0; } +#if __clang__ +# pragma clang attribute pop +#endif + #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ commit b42de67f34871a2520cfe370af513f2aab6e4f75 Author: Jussi Kivilinna Date: Tue Nov 20 21:16:08 2018 +0200 Optimizations for AES-NI OCB * cipher/cipher-internal.h (gcry_cipher_handle): New pre-computed OCB values L0L1 and L0L1L0; Swap dimensions for OCB L table. * cipher/cipher-ocb.c (_gcry_cipher_ocb_set_nonce): Setup L0L1 and L0L1L0 values. (ocb_crypt): Process input in 24KiB chunks for better cache locality for checksumming. * cipher/rijndael-aesni.c (ALWAYS_INLINE): New macro for always inlining functions, change all functions with 'inline' to use ALWAYS_INLINE. (NO_INLINE): New macro. (aesni_prepare_2_6_variable, aesni_prepare_7_15_variable): Rename to... (aesni_prepare_2_7_variable, aesni_prepare_8_15_variable): ...these and adjust accordingly (xmm7 moved from *_7_15 to *_2_7). (aesni_prepare_2_6, aesni_prepare_7_15): Rename to... (aesni_prepare_2_7, aesni_prepare_8_15): ...these and adjust accordingly. (aesni_cleanup_2_6, aesni_cleanup_7_15): Rename to... (aesni_cleanup_2_7, aesni_cleanup_8_15): ...these and adjust accordingly. (aesni_ocb_checksum): New. (aesni_ocb_enc, aesni_ocb_dec): Calculate OCB offsets in parallel with help of pre-computed offsets L0+L1 ja L0+L1+L0; Do checksum calculation as separate pass instead of inline; Use NO_INLINE. (_gcry_aes_aesni_ocb_auth): Calculate OCB offsets in parallel with help of pre-computed offsets L0+L1 ja L0+L1+L0. * cipher/rijndael-internal.h (RIJNDAEL_context_s) [USE_AESNI]: Add 'use_avx2' and 'use_avx'. * cipher/rijndael.c (do_setkey) [USE_AESNI]: Set 'use_avx2' if Intel AVX2 HW feature is available and 'use_avx' if Intel AVX HW feature is available. * tests/basic.c (do_check_ocb_cipher): New test vector; increase size of temporary buffers for new test vector. (check_ocb_cipher_largebuf_split): Make test plaintext non-uniform for better checksum testing. (check_ocb_cipher_checksum): New. (check_ocb_cipher_largebuf): Call check_ocb_cipher_checksum. (check_ocb_cipher): New expected tags for check_ocb_cipher_largebuf test runs. -- Benchmark on Haswell i7-4970k @ 4.0Ghz: Before: AES | nanosecs/byte mebibytes/sec cycles/byte OCB enc | 0.175 ns/B 5436 MiB/s 0.702 c/B OCB dec | 0.184 ns/B 5184 MiB/s 0.736 c/B OCB auth | 0.156 ns/B 6097 MiB/s 0.626 c/B After (enc +2% faster, dec +7% faster): OCB enc | 0.172 ns/B 5547 MiB/s 0.688 c/B OCB dec | 0.171 ns/B 5582 MiB/s 0.683 c/B OCB auth | 0.156 ns/B 6097 MiB/s 0.626 c/B Signed-off-by: Jussi Kivilinna diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index f93363b..8988696 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -319,7 +319,9 @@ struct gcry_cipher_handle /* Helper variables and pre-computed table of L values. */ unsigned char L_star[OCB_BLOCK_LEN]; unsigned char L_dollar[OCB_BLOCK_LEN]; - unsigned char L[OCB_BLOCK_LEN][OCB_L_TABLE_SIZE]; + unsigned char L0L1[OCB_BLOCK_LEN]; + unsigned char L0L1L0[OCB_BLOCK_LEN]; + unsigned char L[OCB_L_TABLE_SIZE][OCB_BLOCK_LEN]; /* The tag is valid if marks.tag has been set. */ unsigned char tag[OCB_BLOCK_LEN]; diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index f71520a..58f7be7 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -170,6 +170,11 @@ _gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, double_block_cpy (c->u_mode.ocb.L[0], c->u_mode.ocb.L_dollar); for (i = 1; i < OCB_L_TABLE_SIZE; i++) double_block_cpy (c->u_mode.ocb.L[i], c->u_mode.ocb.L[i-1]); + /* Precalculated offsets L0+L1, L0+L1+L0 */ + cipher_block_xor (c->u_mode.ocb.L0L1, + c->u_mode.ocb.L[0], c->u_mode.ocb.L[1], OCB_BLOCK_LEN); + cipher_block_xor (c->u_mode.ocb.L0L1L0, + c->u_mode.ocb.L[0], c->u_mode.ocb.L0L1, OCB_BLOCK_LEN); /* Prepare the nonce. */ memset (ktop, 0, (OCB_BLOCK_LEN - noncelen)); @@ -519,6 +524,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, nblks = nblks < nmaxblks ? nblks : nmaxblks; + /* Since checksum xoring is done before/after encryption/decryption, + process input in 24KiB chunks to keep data loaded in L1 cache for + checksumming. */ + if (nblks > 24 * 1024 / OCB_BLOCK_LEN) + nblks = 24 * 1024 / OCB_BLOCK_LEN; + /* Use a bulk method if available. */ if (nblks && c->bulk.ocb_crypt) { diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index d190c0a..c1ebab0 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -41,6 +41,10 @@ #endif +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) + + typedef struct u128_s { u32 a, b, c, d; @@ -49,7 +53,7 @@ typedef struct u128_s /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l because of 'pragma target'. */ -static inline const unsigned char * +static ALWAYS_INLINE const unsigned char * aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; @@ -71,78 +75,78 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ -# define aesni_prepare_2_6_variable char win64tmp[16] -# define aesni_prepare_7_15_variable char win64tmp7_15[16 * 9] +# define aesni_prepare_2_7_variable char win64tmp[16 * 2] +# define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8] # define aesni_prepare() do { } while (0) -# define aesni_prepare_2_6() \ +# define aesni_prepare_2_7() \ do { asm volatile ("movdqu %%xmm6, %0\n\t" \ - : "=m" (*win64tmp) \ + "movdqu %%xmm7, %1\n\t" \ + : "=m" (*win64tmp), "=m" (*(win64tmp+16)) \ : \ : "memory"); \ } while (0) -# define aesni_prepare_7_15() \ - do { asm volatile ("movdqu %%xmm7, 0*16(%0)\n\t" \ - "movdqu %%xmm8, 1*16(%0)\n\t" \ - "movdqu %%xmm9, 2*16(%0)\n\t" \ - "movdqu %%xmm10, 3*16(%0)\n\t" \ - "movdqu %%xmm11, 4*16(%0)\n\t" \ - "movdqu %%xmm12, 5*16(%0)\n\t" \ - "movdqu %%xmm13, 6*16(%0)\n\t" \ - "movdqu %%xmm14, 7*16(%0)\n\t" \ - "movdqu %%xmm15, 8*16(%0)\n\t" \ +# define aesni_prepare_8_15() \ + do { asm volatile ("movdqu %%xmm8, 0*16(%0)\n\t" \ + "movdqu %%xmm9, 1*16(%0)\n\t" \ + "movdqu %%xmm10, 2*16(%0)\n\t" \ + "movdqu %%xmm11, 3*16(%0)\n\t" \ + "movdqu %%xmm12, 4*16(%0)\n\t" \ + "movdqu %%xmm13, 5*16(%0)\n\t" \ + "movdqu %%xmm14, 6*16(%0)\n\t" \ + "movdqu %%xmm15, 7*16(%0)\n\t" \ : \ - : "r" (win64tmp7_15) \ + : "r" (win64tmp8_15) \ : "memory"); \ } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) -# define aesni_cleanup_2_6() \ +# define aesni_cleanup_2_7() \ do { asm volatile ("movdqu %0, %%xmm6\n\t" \ + "movdqu %1, %%xmm7\n\t" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ : \ - : "m" (*win64tmp) \ + : "m" (*win64tmp), "m" (*(win64tmp+16)) \ : "memory"); \ } while (0) -# define aesni_cleanup_7_15() \ - do { asm volatile ("movdqu 0*16(%0), %%xmm7\n\t" \ - "movdqu 1*16(%0), %%xmm8\n\t" \ - "movdqu 2*16(%0), %%xmm9\n\t" \ - "movdqu 3*16(%0), %%xmm10\n\t" \ - "movdqu 4*16(%0), %%xmm11\n\t" \ - "movdqu 5*16(%0), %%xmm12\n\t" \ - "movdqu 6*16(%0), %%xmm13\n\t" \ - "movdqu 7*16(%0), %%xmm14\n\t" \ - "movdqu 8*16(%0), %%xmm15\n\t" \ +# define aesni_cleanup_8_15() \ + do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t" \ + "movdqu 1*16(%0), %%xmm9\n\t" \ + "movdqu 2*16(%0), %%xmm10\n\t" \ + "movdqu 3*16(%0), %%xmm11\n\t" \ + "movdqu 4*16(%0), %%xmm12\n\t" \ + "movdqu 5*16(%0), %%xmm13\n\t" \ + "movdqu 6*16(%0), %%xmm14\n\t" \ + "movdqu 7*16(%0), %%xmm15\n\t" \ : \ - : "r" (win64tmp7_15) \ + : "r" (win64tmp8_15) \ : "memory"); \ } while (0) #else -# define aesni_prepare_2_6_variable +# define aesni_prepare_2_7_variable # define aesni_prepare() do { } while (0) -# define aesni_prepare_2_6() do { } while (0) +# define aesni_prepare_2_7() do { } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) -# define aesni_cleanup_2_6() \ - do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ +# define aesni_cleanup_2_7() \ + do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ + "pxor %%xmm2, %%xmm2\n\t" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n":: ); \ } while (0) # ifdef __x86_64__ -# define aesni_prepare_7_15_variable -# define aesni_prepare_7_15() do { } while (0) -# define aesni_cleanup_7_15() \ - do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ - "pxor %%xmm8, %%xmm8\n" \ +# define aesni_prepare_8_15_variable +# define aesni_prepare_8_15() do { } while (0) +# define aesni_cleanup_8_15() \ + do { asm volatile ("pxor %%xmm8, %%xmm8\n" \ "pxor %%xmm9, %%xmm9\n" \ "pxor %%xmm10, %%xmm10\n" \ "pxor %%xmm11, %%xmm11\n" \ @@ -157,10 +161,10 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) void _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare(); - aesni_prepare_2_6(); + aesni_prepare_2_7(); if (ctx->rounds < 12) { @@ -383,12 +387,12 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) } aesni_cleanup(); - aesni_cleanup_2_6(); + aesni_cleanup_2_7(); } /* Make a decryption key from an encryption key. */ -static inline void +static ALWAYS_INLINE void do_aesni_prepare_decryption (RIJNDAEL_context *ctx) { /* The AES-NI decrypt instructions use the Equivalent Inverse @@ -447,7 +451,7 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) /* Encrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc (const RIJNDAEL_context *ctx) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" @@ -500,7 +504,7 @@ do_aesni_enc (const RIJNDAEL_context *ctx) /* Decrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec (const RIJNDAEL_context *ctx) { #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" @@ -553,7 +557,7 @@ do_aesni_dec (const RIJNDAEL_context *ctx) /* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) { #define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t" @@ -662,7 +666,7 @@ do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) /* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) { #define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t" @@ -773,7 +777,7 @@ do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) /* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static inline void +static ALWAYS_INLINE void do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" @@ -925,7 +929,7 @@ do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) /* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static inline void +static ALWAYS_INLINE void do_aesni_dec_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" @@ -1757,10 +1761,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ @@ -1794,7 +1798,7 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -1805,10 +1809,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ @@ -1820,9 +1824,9 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { @@ -1831,7 +1835,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -1848,7 +1852,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, inbuf += BLOCKSIZE; } aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -1876,10 +1880,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm6\n\t" : /* No output */ @@ -1891,9 +1895,9 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8; nblocks -= 8) { @@ -1953,7 +1957,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2022,7 +2026,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -2031,10 +2035,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6(); + aesni_prepare_2_7(); if ( !ctx->decryption_prepared ) { @@ -2051,9 +2055,9 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { @@ -2113,7 +2117,7 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2187,11 +2191,175 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, : "memory"); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } -static void +static ALWAYS_INLINE void +aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + + /* Calculate checksum */ + asm volatile ("movdqu %[checksum], %%xmm6\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + : + :[checksum] "m" (*c->u_ctr.ctr) + : "memory" ); + + if (0) {} +#if defined(HAVE_GCC_INLINE_ASM_AVX2) + else if (nblocks >= 16 && ctx->use_avx2) + { + /* Use wider 256-bit registers for fast xoring of plaintext. */ + asm volatile ("vzeroupper\n\t" + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" + "vpxor %%xmm5, %%xmm5, %%xmm5\n\t" + "vpxor %%xmm7, %%xmm7, %%xmm7\n\t" + : + : + : "memory"); + + for (;nblocks >= 16; nblocks -= 16) + { + asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t" + "vpxor %[ptr1], %%ymm1, %%ymm1\n\t" + "vpxor %[ptr2], %%ymm2, %%ymm2\n\t" + "vpxor %[ptr3], %%ymm3, %%ymm3\n\t" + "vpxor %[ptr4], %%ymm0, %%ymm0\n\t" + "vpxor %[ptr5], %%ymm4, %%ymm4\n\t" + "vpxor %[ptr6], %%ymm5, %%ymm5\n\t" + "vpxor %[ptr7], %%ymm7, %%ymm7\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), + [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), + [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), + [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), + [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), + [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), + [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) + : "memory" ); + plaintext += BLOCKSIZE * 16; + } + + asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t" + "vpxor %%ymm4, %%ymm1, %%ymm1\n\t" + "vpxor %%ymm5, %%ymm2, %%ymm2\n\t" + "vpxor %%ymm7, %%ymm3, %%ymm3\n\t" + "vextracti128 $1, %%ymm6, %%xmm0\n\t" + "vextracti128 $1, %%ymm1, %%xmm4\n\t" + "vextracti128 $1, %%ymm2, %%xmm5\n\t" + "vextracti128 $1, %%ymm3, %%xmm7\n\t" + "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" + "vpxor %%xmm4, %%xmm1, %%xmm1\n\t" + "vpxor %%xmm5, %%xmm2, %%xmm2\n\t" + "vpxor %%xmm7, %%xmm3, %%xmm3\n\t" + "vzeroupper\n\t" + : + : + : "memory" ); + } +#endif +#if defined(HAVE_GCC_INLINE_ASM_AVX) + else if (nblocks >= 16 && ctx->use_avx) + { + /* Same as AVX2, except using 256-bit floating point instructions. */ + asm volatile ("vzeroupper\n\t" + "vxorpd %%xmm0, %%xmm0, %%xmm0\n\t" + "vxorpd %%xmm4, %%xmm4, %%xmm4\n\t" + "vxorpd %%xmm5, %%xmm5, %%xmm5\n\t" + "vxorpd %%xmm7, %%xmm7, %%xmm7\n\t" + : + : + : "memory"); + + for (;nblocks >= 16; nblocks -= 16) + { + asm volatile ("vxorpd %[ptr0], %%ymm6, %%ymm6\n\t" + "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t" + "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t" + "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t" + "vxorpd %[ptr4], %%ymm0, %%ymm0\n\t" + "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t" + "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t" + "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), + [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), + [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), + [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), + [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), + [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), + [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) + : "memory" ); + plaintext += BLOCKSIZE * 16; + } + + asm volatile ("vxorpd %%ymm0, %%ymm6, %%ymm6\n\t" + "vxorpd %%ymm4, %%ymm1, %%ymm1\n\t" + "vxorpd %%ymm5, %%ymm2, %%ymm2\n\t" + "vxorpd %%ymm7, %%ymm3, %%ymm3\n\t" + "vextractf128 $1, %%ymm6, %%xmm0\n\t" + "vextractf128 $1, %%ymm1, %%xmm4\n\t" + "vextractf128 $1, %%ymm2, %%xmm5\n\t" + "vextractf128 $1, %%ymm3, %%xmm7\n\t" + "vxorpd %%xmm0, %%xmm6, %%xmm6\n\t" + "vxorpd %%xmm4, %%xmm1, %%xmm1\n\t" + "vxorpd %%xmm5, %%xmm2, %%xmm2\n\t" + "vxorpd %%xmm7, %%xmm3, %%xmm3\n\t" + "vzeroupper\n\t" + : + : + : "memory" ); + } +#endif + + for (;nblocks >= 4; nblocks -= 4) + { + asm volatile ("movdqu %[ptr0], %%xmm0\n\t" + "movdqu %[ptr1], %%xmm4\n\t" + "movdqu %[ptr2], %%xmm5\n\t" + "movdqu %[ptr3], %%xmm7\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm4, %%xmm1\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm7, %%xmm3\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)), + [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)), + [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE)) + : "memory" ); + plaintext += BLOCKSIZE * 4; + } + + for (;nblocks >= 1; nblocks -= 1) + { + asm volatile ("movdqu %[ptr0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + : + : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)) + : "memory" ); + plaintext += BLOCKSIZE; + } + + asm volatile ("pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "movdqu %%xmm6, %[checksum]\n\t" + : [checksum] "=m" (*c->u_ctr.ctr) + : + : "memory" ); +} + + +static unsigned int NO_INLINE aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { @@ -2200,31 +2368,28 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); + + aesni_ocb_checksum (c, inbuf_arg, nblocks); - /* Preload Offset and Checksum */ + /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" - "movdqu %[ctr], %%xmm6\n\t" : /* No output */ - : [iv] "m" (*c->u_iv.iv), - [ctr] "m" (*c->u_ctr.ctr) + : [iv] "m" (*c->u_iv.iv) : "memory" ); - for ( ;nblocks && n % 4; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" - "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), @@ -2246,11 +2411,11 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); - asm volatile ("movdqu %[l0], %%xmm7\n\t" + asm volatile ("movdqu %[l0], %%xmm6\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]) : "memory" ); @@ -2260,78 +2425,78 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, n += 4; l = aes_ocb_get_l(c, n); - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - - asm volatile ("movdqu %[l1], %%xmm10\n\t" - "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqa %%xmm5, %%xmm12\n\t" + asm volatile ("movdqu %[l0l1], %%xmm10\n\t" + "movdqu %[l0l1l0], %%xmm11\n\t" + "movdqu %[l3], %%xmm15\n\t" : - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : [l0l1] "m" (*c->u_mode.ocb.L0L1), + [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqa %%xmm5, %%xmm13\n\t" + + n += 4; + l = aes_ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */ + asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" : - : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm5, %%xmm3\n\t" - "movdqa %%xmm5, %%xmm14\n\t" + asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" + "movdqu %[inbuf4], %%xmm8\n\t" + "movdqu %[inbuf5], %%xmm9\n\t" : - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), + [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), + [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm15\n\t" - "movdqu %[inbuf3], %%xmm4\n\t" + asm volatile ("movdqa %%xmm6, %%xmm12\n\t" + "pxor %%xmm5, %%xmm12\n\t" + "pxor %%xmm12, %%xmm1\n\t" + + "movdqa %%xmm10, %%xmm13\n\t" + "pxor %%xmm5, %%xmm13\n\t" + "pxor %%xmm13, %%xmm2\n\t" + + "movdqa %%xmm11, %%xmm14\n\t" + "pxor %%xmm5, %%xmm14\n\t" + "pxor %%xmm14, %%xmm3\n\t" + + "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm15, %%xmm5\n\t" - "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" - : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) - : "memory" ); - n += 4; - l = aes_ocb_get_l(c, n); - - asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm8, %%xmm6\n\t" - "pxor %%xmm5, %%xmm8\n\t" - "movdqu %%xmm5, %[outbuf4]\n\t" - : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) - : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm9, %%xmm6\n\t" - "pxor %%xmm5, %%xmm9\n\t" - "movdqu %%xmm5, %[outbuf5]\n\t" - : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) - : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) + "movdqa %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm8\n\t" + "movdqu %%xmm0, %[outbuf4]\n\t" + + "movdqa %%xmm10, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm9\n\t" + "movdqu %%xmm0, %[outbuf5]\n\t" + : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), + [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) + : : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm10, %%xmm6\n\t" - "pxor %%xmm5, %%xmm10\n\t" - "movdqu %%xmm5, %[outbuf6]\n\t" + "movdqa %%xmm11, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm10\n\t" + "movdqu %%xmm0, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l7], %%xmm11\n\t" + asm volatile ("movdqu %[l7], %%xmm0\n\t" "pxor %%xmm11, %%xmm5\n\t" + "pxor %%xmm0, %%xmm5\n\t" "movdqu %[inbuf7], %%xmm11\n\t" - "pxor %%xmm11, %%xmm6\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), @@ -2374,7 +2539,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2384,44 +2549,46 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - asm volatile ("movdqu %[l0], %%xmm4\n\t" + asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqu %%xmm5, %[outbuf0]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + "movdqu %[l0l1], %%xmm3\n\t" + : : [l0] "m" (*c->u_mode.ocb.L[0]), + [l0l1] "m" (*c->u_mode.ocb.L0L1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l1], %%xmm0\n\t" - "movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm0, %%xmm5\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqu %%xmm5, %[outbuf1]\n\t" - : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + asm volatile ("movdqu %[l0l1l0], %%xmm4\n\t" + "movdqu %[l3], %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm0, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm3, %%xmm6\n\t" + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" - "movdqu %%xmm5, %[outbuf2]\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "movdqu %%xmm3, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm0, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm4\n\t" + asm volatile ("pxor %%xmm6, %%xmm5\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); @@ -2453,12 +2620,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" - "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), @@ -2479,30 +2644,31 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" - "movdqu %%xmm6, %[ctr]\n\t" - : [iv] "=m" (*c->u_iv.iv), - [ctr] "=m" (*c->u_ctr.ctr) + : [iv] "=m" (*c->u_iv.iv) : : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); + + return 0; } -static void +static unsigned int NO_INLINE aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks) + const void *inbuf_arg, size_t nblocks_arg) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + size_t nblocks = nblocks_arg; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { @@ -2510,12 +2676,10 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, ctx->decryption_prepared = 1; } - /* Preload Offset and Checksum */ + /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" - "movdqu %[ctr], %%xmm6\n\t" : /* No output */ - : [iv] "m" (*c->u_iv.iv), - [ctr] "m" (*c->u_ctr.ctr) + : [iv] "m" (*c->u_iv.iv) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) @@ -2524,7 +2688,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" @@ -2537,7 +2700,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" - "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : @@ -2550,11 +2712,11 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); - asm volatile ("movdqu %[l0], %%xmm7\n\t" + asm volatile ("movdqu %[l0], %%xmm6\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]) : "memory" ); @@ -2564,70 +2726,78 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, n += 4; l = aes_ocb_get_l(c, n); - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - - asm volatile ("movdqu %[l1], %%xmm10\n\t" - "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqa %%xmm5, %%xmm12\n\t" + asm volatile ("movdqu %[l0l1], %%xmm10\n\t" + "movdqu %[l0l1l0], %%xmm11\n\t" + "movdqu %[l3], %%xmm15\n\t" : - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : [l0l1] "m" (*c->u_mode.ocb.L0L1), + [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqa %%xmm5, %%xmm13\n\t" + + n += 4; + l = aes_ocb_get_l(c, n); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" : - : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm3\n\t" - "movdqa %%xmm5, %%xmm14\n\t" + asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" + "movdqu %[inbuf4], %%xmm8\n\t" + "movdqu %[inbuf5], %%xmm9\n\t" : - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), + [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), + [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" - "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm0, %%xmm5\n\t" + asm volatile ("movdqa %%xmm6, %%xmm12\n\t" + "pxor %%xmm5, %%xmm12\n\t" + "pxor %%xmm12, %%xmm1\n\t" + + "movdqa %%xmm10, %%xmm13\n\t" + "pxor %%xmm5, %%xmm13\n\t" + "pxor %%xmm13, %%xmm2\n\t" + + "movdqa %%xmm11, %%xmm14\n\t" + "pxor %%xmm5, %%xmm14\n\t" + "pxor %%xmm14, %%xmm3\n\t" + + "pxor %%xmm11, %%xmm5\n\t" + "pxor %%xmm15, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" - : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) - : "memory" ); - - n += 4; - l = aes_ocb_get_l(c, n); - asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm8\n\t" - "movdqu %%xmm5, %[outbuf4]\n\t" - : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) - : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" - "pxor %%xmm10, %%xmm5\n\t" - "pxor %%xmm5, %%xmm9\n\t" - "movdqu %%xmm5, %[outbuf5]\n\t" - : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) - : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) + "movdqa %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm8\n\t" + "movdqu %%xmm0, %[outbuf4]\n\t" + + "movdqa %%xmm10, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm9\n\t" + "movdqu %%xmm0, %[outbuf5]\n\t" + : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), + [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) + : : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm10\n\t" - "movdqu %%xmm5, %[outbuf6]\n\t" + "movdqa %%xmm11, %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm10\n\t" + "movdqu %%xmm0, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" - "movdqu %[inbuf7], %%xmm11\n\t" + "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm0, %%xmm5\n\t" + "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), @@ -2655,14 +2825,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" - "pxor %%xmm2, %%xmm1\n\t" - "pxor %%xmm4, %%xmm1\n\t" - "pxor %%xmm9, %%xmm1\n\t" - "pxor %%xmm11, %%xmm1\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm8, %%xmm6\n\t" - "pxor %%xmm10, %%xmm6\n\t" - "pxor %%xmm1, %%xmm6\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), @@ -2678,7 +2840,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, inbuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2688,40 +2850,46 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - /* Checksum_i = Checksum_{i-1} xor P_i */ - asm volatile ("movdqu %[l0], %%xmm4\n\t" + /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" - "movdqu %%xmm5, %[outbuf0]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + "movdqu %[l0l1], %%xmm3\n\t" + : : [l0] "m" (*c->u_mode.ocb.L[0]), + [l0l1] "m" (*c->u_mode.ocb.L0L1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l1], %%xmm0\n\t" - "movdqu %[inbuf1], %%xmm2\n\t" - "pxor %%xmm0, %%xmm5\n\t" - "pxor %%xmm5, %%xmm2\n\t" - "movdqu %%xmm5, %[outbuf1]\n\t" - : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) - : [l1] "m" (*c->u_mode.ocb.L[1]), - [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + asm volatile ("movdqu %[l0l1l0], %%xmm4\n\t" + "movdqu %[l3], %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm0, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" - "pxor %%xmm4, %%xmm5\n\t" + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" - "movdqu %%xmm5, %[outbuf2]\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "movdqu %%xmm3, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm0, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) - : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" + asm volatile ("pxor %%xmm6, %%xmm5\n\t" + "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" - "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : - : [l3] "m" (*l), - [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_dec_vec4 (ctx); @@ -2737,10 +2905,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" - "pxor %%xmm1, %%xmm6\n\t" - "pxor %%xmm2, %%xmm6\n\t" - "pxor %%xmm3, %%xmm6\n\t" - "pxor %%xmm4, %%xmm6\n\t" : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), @@ -2771,7 +2935,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" - "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : @@ -2783,14 +2946,16 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" - "movdqu %%xmm6, %[ctr]\n\t" - : [iv] "=m" (*c->u_iv.iv), - [ctr] "=m" (*c->u_ctr.ctr) + : [iv] "=m" (*c->u_iv.iv) : : "memory" ); + aesni_ocb_checksum (c, outbuf_arg, nblocks_arg); + aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); + + return 0; } @@ -2799,11 +2964,9 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { if (encrypt) - aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else - aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); - - return 0; + return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); } @@ -2815,10 +2978,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; const unsigned char *l; - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" @@ -2856,15 +3019,17 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, #ifdef __x86_64__ if (nblocks >= 8) { - aesni_prepare_7_15_variable; + aesni_prepare_8_15_variable; - aesni_prepare_7_15(); + aesni_prepare_8_15(); - asm volatile ("movdqu %[l0], %%xmm7\n\t" - "movdqu %[l1], %%xmm12\n\t" + asm volatile ("movdqu %[l0], %%xmm7\n\t" + "movdqu %[l0l1], %%xmm12\n\t" + "movdqu %[l0l1l0], %%xmm13\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), - [l1] "m" (*c->u_mode.ocb.L[1]) + [l0l1] "m" (*c->u_mode.ocb.L0L1), + [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) @@ -2872,63 +3037,66 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, n += 4; l = aes_ocb_get_l(c, n); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "pxor %%xmm13, %%xmm0\n\t" + : + : [l3] "m" (*l) + : "memory" ); + + n += 4; + l = aes_ocb_get_l(c, n); + + asm volatile ("movdqu %[l7], %%xmm14\n\t" + "pxor %%xmm13, %%xmm14\n\t" + : + : [l7] "m" (*l) + : "memory" ); + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[abuf0], %%xmm1\n\t" - "pxor %%xmm7, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" + "movdqu %[abuf1], %%xmm2\n\t" + "movdqu %[abuf2], %%xmm3\n\t" + "movdqu %[abuf3], %%xmm4\n\t" + "movdqu %[abuf4], %%xmm8\n\t" + "movdqu %[abuf5], %%xmm9\n\t" + "movdqu %[abuf6], %%xmm10\n\t" + "movdqu %[abuf7], %%xmm11\n\t" : - : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) + : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)), + [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)), + [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)), + [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)), + [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)), + [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)), + [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)), + [abuf7] "m" (*(abuf + 7 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[abuf1], %%xmm2\n\t" - "pxor %%xmm12, %%xmm5\n\t" + asm volatile ("pxor %%xmm7, %%xmm1\n\t" + "pxor %%xmm5, %%xmm1\n\t" + + "pxor %%xmm12, %%xmm2\n\t" "pxor %%xmm5, %%xmm2\n\t" - : - : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[abuf2], %%xmm3\n\t" - "pxor %%xmm7, %%xmm5\n\t" + + "pxor %%xmm13, %%xmm3\n\t" "pxor %%xmm5, %%xmm3\n\t" - : - : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" - "movdqu %[abuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" - : - : [l3] "m" (*l), - [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) - : "memory" ); - n += 4; - l = aes_ocb_get_l(c, n); - - asm volatile ("movdqu %[abuf4], %%xmm8\n\t" - "pxor %%xmm7, %%xmm5\n\t" + "pxor %%xmm7, %%xmm8\n\t" "pxor %%xmm5, %%xmm8\n\t" - : - : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[abuf5], %%xmm9\n\t" - "pxor %%xmm12, %%xmm5\n\t" + + "pxor %%xmm12, %%xmm9\n\t" "pxor %%xmm5, %%xmm9\n\t" - : - : [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[abuf6], %%xmm10\n\t" - "pxor %%xmm7, %%xmm5\n\t" + + "pxor %%xmm13, %%xmm10\n\t" "pxor %%xmm5, %%xmm10\n\t" - : - : [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)) - : "memory" ); - asm volatile ("movdqu %[l7], %%xmm0\n\t" - "movdqu %[abuf7], %%xmm11\n\t" - "pxor %%xmm0, %%xmm5\n\t" + + "pxor %%xmm14, %%xmm5\n\t" "pxor %%xmm5, %%xmm11\n\t" : - : [l7] "m" (*l), - [abuf7] "m" (*(abuf + 7 * BLOCKSIZE)) + : : "memory" ); do_aesni_enc_vec8 (ctx); @@ -2948,7 +3116,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, abuf += 8*BLOCKSIZE; } - aesni_cleanup_7_15(); + aesni_cleanup_8_15(); } #endif @@ -2959,36 +3127,41 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - asm volatile ("movdqu %[l0], %%xmm4\n\t" + asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[abuf0], %%xmm1\n\t" - "pxor %%xmm4, %%xmm5\n\t" - "pxor %%xmm5, %%xmm1\n\t" + "movdqu %[l0l1], %%xmm3\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), + [l0l1] "m" (*c->u_mode.ocb.L0L1), [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l1], %%xmm0\n\t" - "movdqu %[abuf1], %%xmm2\n\t" - "pxor %%xmm0, %%xmm5\n\t" - "pxor %%xmm5, %%xmm2\n\t" + asm volatile ("movdqu %[l0l1l0], %%xmm4\n\t" + "movdqu %[l3], %%xmm7\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" : - : [l1] "m" (*c->u_mode.ocb.L[1]), - [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) + : [l0l1l0] "m" (*c->u_mode.ocb.L0L1L0), + [l3] "m" (*l) : "memory" ); - asm volatile ("movdqu %[abuf2], %%xmm3\n\t" - "pxor %%xmm4, %%xmm5\n\t" + asm volatile ("movdqu %[abuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm3, %%xmm2\n\t" : - : [l2] "m" (*c->u_mode.ocb.L[0]), - [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) + : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) : "memory" ); - asm volatile ("movdqu %[l3], %%xmm0\n\t" + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[abuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + : + : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("pxor %%xmm7, %%xmm5\n\t" + "pxor %%xmm4, %%xmm5\n\t" "movdqu %[abuf3], %%xmm4\n\t" - "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : - : [l3] "m" (*l), - [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) + : [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); @@ -3038,7 +3211,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); return 0; } @@ -3053,10 +3226,10 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); /* Preload Tweak */ asm volatile ("movdqu %[tweak], %%xmm5\n\t" @@ -3182,7 +3355,7 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } @@ -3191,10 +3364,10 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { - aesni_prepare_2_6_variable; + aesni_prepare_2_7_variable; aesni_prepare (); - aesni_prepare_2_6 (); + aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { @@ -3326,7 +3499,7 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, : "memory" ); aesni_cleanup (); - aesni_cleanup_2_6 (); + aesni_cleanup_2_7 (); } diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 160fb8c..876d55f 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -143,6 +143,8 @@ typedef struct RIJNDAEL_context_s #endif /*USE_PADLOCK*/ #ifdef USE_AESNI unsigned int use_aesni:1; /* AES-NI shall be used. */ + unsigned int use_avx:1; /* AVX shall be used. */ + unsigned int use_avx2:1; /* AVX2 shall be used. */ #endif /*USE_AESNI*/ #ifdef USE_SSSE3 unsigned int use_ssse3:1; /* SSSE3 shall be used. */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 1bc8b0f..8094537 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -334,6 +334,8 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; + ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX); + ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2); if (hd) { hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc; diff --git a/tests/basic.c b/tests/basic.c index f3d8951..0afae30 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -4411,11 +4411,114 @@ do_check_ocb_cipher (int inplace) "1792A4E31E0755FB03E31B22116E6C2DDF9EFD6E33D536F1" "A0124B0A55BAE884ED93481529C76B6AD0C515F4D1CDD4FD" "AC4F02AA" + }, + { GCRY_CIPHER_AES, 12, "0F0E0D0C0B0A09080706050403020100", + "BBAA9988776655443322110D", + "000102030405060708090A0B0C0D0E0F1011121314151617" + "18191A1B1C1D1E1F2021222324252627", + /* test vector for checksumming */ + "01000000000000000000000000000000" + "02000000000000000000000000000000" + "04000000000000000000000000000000" + "08000000000000000000000000000000" + "10000000000000000000000000000000" + "20000000000000000000000000000000" + "40000000000000000000000000000000" + "80000000000000000000000000000000" + "00010000000000000000000000000000" + "00020000000000000000000000000000" + "00040000000000000000000000000000" + "00080000000000000000000000000000" + "00100000000000000000000000000000" + "00200000000000000000000000000000" + "00400000000000000000000000000000" + "00800000000000000000000000000000" + "00000100000000000000000000000000" + "00000200000000000000000000000000" + "00000400000000000000000000000000" + "00000800000000000000000000000000" + "00001000000000000000000000000000" + "00002000000000000000000000000000" + "00004000000000000000000000000000" + "00008000000000000000000000000000" + "00000001000000000000000000000000" + "00000002000000000000000000000000" + "00000004000000000000000000000000" + "00000008000000000000000000000000" + "00000010000000000000000000000000" + "00000020000000000000000000000000" + "00000040000000000000000000000000" + "00000080000000000000000000000000" + "00000000010000000000000000000000" + "00000000020000000000000000000000" + "00000000040000000000000000000000" + "00000000080000000000000000000000" + "00000000100000000000000000000000" + "00000000200000000000000000000000" + "00000000400000000000000000000000" + "00000000800000000000000000000000" + "00000000000100000000000000000000" + "00000000000200000000000000000000" + "00000000000400000000000000000000" + "00000000000800000000000000000000" + "00000000001000000000000000000000" + "00000000002000000000000000000000" + "00000000004000000000000000000000" + "00000000008000000000000000000000", + "01105c6e36f6ac480f022c51e31ed702" + "90fda4b7b783194d4b4be8e4e1e2dff4" + "6a0804d1c5f9f808ea7933e31c063233" + "2bf65a22b20bb13cde3b80b3682ba965" + "b1207c58916f7856fa9968b410e50dee" + "98b35c071163d1b352b9bbccd09fde29" + "b850f40e71a8ae7d2e2d577f5ee39c46" + "7fa28130b50a123c29958e4665dda9a5" + "e0793997f8f19633a96392141d6e0e88" + "77850ed4364065d1d2f8746e2f1d5fd1" + "996cdde03215306503a30e41f58ef3c4" + "400365cfea4fa6381157c12a46598edf" + "18604854462ec66e3d3cf26d4723cb6a" + "9d801095048086a606fdb9192760889b" + "a8ce2e70e1b55a469137a9e2e6734565" + "283cb1e2c74f37e0854d03e33f8ba499" + "ef5d9af4edfce077c6280338f0a64286" + "2e6bc27ebd5a4c91b3778e22631251c8" + "c5bb75a10945597a9d6c274fc82d3338" + "b403a0a549d1375f26e71ef22bce0941" + "93ea87e2ed72fce0546148c351eec3be" + "867bb1b96070c377fff3c98e21562beb" + "475cfe28abcaaedf49981f6599b15140" + "ea6130d24407079f18ba9d4a8960b082" + "b39c57320e2e064f02fde88c23112146" + "1cac3655868aef584714826ee4f361fb" + "e6d692e1589cbb9dd3c74fa628df2a1f" + "3b0029b1d62b7e9978013ed3c793c1dd" + "1f184c8f7022a853cac40b74ac749aa3" + "f33f0d14732dfda0f2c3c20591bf1f5a" + "710ec0d0bca342baa5146068a78ff58c" + "66316312b7a98af35a0f4e92799b4047" + "f047ae61f25c28d232ce5c168cc745d6" + "6da13cb0f9e38a696635dba7a21571cf" + "cd64ec8cc33db7879f59a90d9edd00f6" + "a899e39ab36b9269a3ac04ebad9326bf" + "53cd9b400168a61714cd628a4056d236" + "bd8622c76daa54cb65f5db2fe03bafbe" + "0b23549ae31136f607293e8093a21934" + "74fd5e9c2451b4c8e0499e6ad34fafc8" + "ab77722a282f7f84b14ddebf7e696300" + "c1ef92d4a0263c6cca104530f996e272" + "f58992ff68d642b071a5848dc4acf2ae" + "28fb1f27ae0f297d5136a7a0a4a03e89" + "b588755b8217a1c62773790e69261269" + "19f45daf7b3ccf18e3fc590a9a0e172f" + "033ac4d13c3decc4c62d7de718ace802" + "140452dc850989f6762e3578bbb04be3" + "1a237c599c4649f4e586b2de" } }; gpg_error_t err = 0; gcry_cipher_hd_t hde, hdd; - unsigned char out[MAX_DATA_LEN]; + unsigned char out[1024]; unsigned char tag[16]; int tidx; @@ -4548,7 +4651,7 @@ do_check_ocb_cipher (int inplace) } else { - err = gcry_cipher_encrypt (hde, out, MAX_DATA_LEN, + err = gcry_cipher_encrypt (hde, out, sizeof(out), plain, plainlen); } } @@ -4605,7 +4708,7 @@ do_check_ocb_cipher (int inplace) } else { - unsigned char tmp[MAX_DATA_LEN]; + unsigned char tmp[sizeof(out)]; memcpy(tmp, out, plainlen); err = gcry_cipher_decrypt (hdd, out, plainlen, tmp, plainlen); @@ -4696,7 +4799,7 @@ check_ocb_cipher_largebuf_split (int algo, int keylen, const char *tagexpect, } for (i = 0; i < buflen; i++) - inbuf[i] = 'a'; + inbuf[i] = (i + 181081) * 5039; err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0); if (!err) @@ -4855,6 +4958,131 @@ out_free: static void +check_ocb_cipher_checksum (int algo, int keylen) +{ + static const unsigned char key[32] = + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + static const unsigned char nonce[12] = + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03"; + const size_t buflen = 128 * 16; + unsigned char *inbuf, *outbuf; + gpg_error_t err = 0; + gcry_cipher_hd_t hde, hde2; + unsigned char tag[16]; + unsigned char tag2[16]; + int i; + + inbuf = xmalloc(buflen); + if (!inbuf) + { + fail ("out-of-memory\n"); + return; + } + outbuf = xmalloc(buflen); + if (!inbuf) + { + fail ("out-of-memory\n"); + xfree(inbuf); + return; + } + + memset(inbuf, 0, buflen); + for (i = 0; i < 128; i += 16) + { + unsigned char *blk = inbuf + i; + int bit2set = i / 16; + int byteidx = bit2set / 8; + int bitpos = bit2set % 8; + + blk[byteidx] |= 1 << bitpos; + } + + err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0); + if (!err) + err = gcry_cipher_open (&hde2, algo, GCRY_CIPHER_MODE_OCB, 0); + if (err) + { + fail ("cipher-ocb, gcry_cipher_open failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + goto out_free; + } + + err = gcry_cipher_setkey (hde, key, keylen); + if (!err) + err = gcry_cipher_setkey (hde2, key, keylen); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setkey failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + err = gcry_cipher_setiv (hde, nonce, 12); + if (!err) + err = gcry_cipher_setiv (hde2, nonce, 12); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setiv failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + err = gcry_cipher_final (hde); + if (!err) + { + err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen); + } + for (i = 0; i < buflen && !err; i += 16) + { + if (i + 16 == buflen) + err = gcry_cipher_final (hde2); + if (!err) + err = gcry_cipher_encrypt (hde2, outbuf + i, 16, inbuf + i, 16); + } + + if (err) + { + fail ("cipher-ocb, gcry_cipher_encrypt failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + goto out_free; + } + + /* Check that the tag matches. */ + err = gcry_cipher_gettag (hde, tag, 16); + if (err) + { + fail ("cipher_ocb, gcry_cipher_gettag failed (checksum, algo %d): %s\n", + algo, gpg_strerror (err)); + } + err = gcry_cipher_gettag (hde2, tag2, 16); + if (err) + { + fail ("cipher_ocb, gcry_cipher_gettag failed (checksum2, algo %d): %s\n", + algo, gpg_strerror (err)); + } + if (memcmp (tag, tag2, 16)) + { + mismatch (tag, 16, tag2, 16); + fail ("cipher-ocb, encrypt tag mismatch (checksum, algo %d)\n", algo); + } + + gcry_cipher_close (hde); + gcry_cipher_close (hde2); + +out_free: + xfree(inbuf); + xfree(outbuf); +} + + +static void check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect) { unsigned int split; @@ -4863,6 +5091,8 @@ check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect) { check_ocb_cipher_largebuf_split(algo, keylen, tagexpect, split); } + + check_ocb_cipher_checksum(algo, keylen); } @@ -5108,35 +5338,25 @@ check_ocb_cipher (void) /* Check large buffer encryption/decryption. */ check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16, - "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8" - "\x33\xfd\x7a\x4f\x42\x60\x5d\x20"); + "\xc1\x5b\xf1\x80\xa4\xd5\xea\xfd\xae\x17\xa6\xcd\x6b\x10\xa8\xea"); check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32, - "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" - "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); + "\x2b\xb7\x25\x6b\x77\xc7\xfb\x21\x5c\xc9\x6c\x36\x17\x1a\x1a\xd5"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16, - "\x28\x23\x38\x45\x2b\xfd\x42\x45" - "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd"); + "\xe0\xae\x3f\x29\x3a\xee\xd8\xe3\xf2\x20\xc1\xa2\xd8\x72\x12\xd9"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24, - "\xee\xca\xe5\x39\x27\x2d\x33\xe7" - "\x79\x74\xb0\x1d\x37\x12\xd5\x6c"); + "\xd7\x98\x71\xcf\x19\x5c\xa3\x3d\x6c\xfc\xc9\xbe\x9f\x13\x6b\xbd"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32, - "\x39\x39\xd0\x2d\x05\x68\x74\xee" - "\x18\x6b\xea\x3d\x0b\xd3\x58\xae"); + "\x03\xf6\xec\x1a\x0e\xae\x66\x24\x2b\xba\x26\x0f\xb3\xb3\x1f\xb9"); check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 16, - "\x63\xe3\x0e\xb9\x11\x6f\x14\xba" - "\x79\xe4\xa7\x9e\xad\x3c\x02\x0c"); + "\x1c\xf9\xc7\xfc\x3a\x32\xac\xc7\x5e\x0a\xc2\x5c\x90\xd6\xf6\xf9"); check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32, - "\xf6\xd4\xfe\x4e\x50\x85\x13\x59" - "\x69\x0e\x4c\x67\x3e\xdd\x47\x90"); + "\x53\x02\xc8\x0d\x4e\x9a\x44\x9e\x43\xd4\xaa\x06\x30\x93\xcc\x16"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT128, 16, - "\x3c\xfb\x66\x14\x3c\xc8\x6c\x67" - "\x26\xb8\x23\xeb\xaf\x43\x98\x69"); + "\xd3\x64\xac\x40\x48\x88\x77\xe2\x41\x26\x4c\xde\x21\x29\x21\x8d"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT192, 24, - "\x5e\x62\x27\xc5\x32\xc3\x1d\xe6" - "\x2e\x65\xe7\xd6\xfb\x05\xd7\xb2"); + "\x99\xeb\x35\xb0\x62\x4e\x7b\xf1\x5e\x9f\xed\x32\x78\x90\x0b\xd0"); check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT256, 32, - "\xe7\x8b\xe6\xd4\x2f\x7a\x36\x4c" - "\xba\xee\x20\xe2\x68\xf4\xcb\xcc"); + "\x71\x66\x2f\x68\xbf\xdd\xcc\xb1\xbf\x81\x56\x5f\x01\x73\xeb\x44"); /* Check that the AAD data is correctly buffered. */ check_ocb_cipher_splitaad (); ----------------------------------------------------------------------- Summary of changes: cipher/cipher-gcm-intel-pclmul.c | 7 + cipher/cipher-internal.h | 4 +- cipher/cipher-ocb.c | 11 + cipher/crc-intel-pclmul.c | 7 + cipher/rijndael-aesni.c | 878 +++++++++++++++++++++++---------------- cipher/rijndael-internal.h | 2 + cipher/rijndael-ssse3-amd64.c | 7 + cipher/rijndael.c | 2 + cipher/sha1-intel-shaext.c | 7 + cipher/sha256-intel-shaext.c | 7 + tests/basic.c | 268 ++++++++++-- 11 files changed, 826 insertions(+), 374 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From garcia.yann at gmail.com Sun Nov 25 13:45:42 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Sun, 25 Nov 2018 13:45:42 +0100 Subject: Trouble to verify ECDSA signature, NIST P-256 Message-ID: Dear All, I got issue (gcrypt/Not implementedgcrypt/Not implemented) will verifying a signature. I have the public key, the signature and the curve. I proceed in the following way: 1) Build public key q=hex_to_data("AC529F186F485D194EBE3677EA9FD1D7E7280648081A01686B3E78528D8AA5C6DC44DB3E54EEF45BA7EE989572D1DC0F83FF071E30B1EE5972D52D22D204A0AD", &q_size); if ((rc = gcry_sexp_build (&public_key, NULL, "(ecc(curve \"NIST P-256\")(q %b))\n", q_size, q)) != 0) { printf("Failed for %s/%s\n", gcry_strsource(rc), gcry_strerror(rc)); .... } 2) Create a context based on the public key if (0 != (rc = gcry_mpi_ec_new(&ctx, public_key, "NIST P-256"))) { printf("Failed for %s/%s\n", gcry_strsource(rc), gcry_strerror(rc)); .... } And here was the issue: I got the error message "gcrypt/Not implemented" Can anyone provide me some hints to fix it, I have no idea how to proceed here? Many thanks in advance for your help, Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From gniibe at fsij.org Mon Nov 26 00:47:31 2018 From: gniibe at fsij.org (NIIBE Yutaka) Date: Mon, 26 Nov 2018 08:47:31 +0900 Subject: Trouble to verify ECDSA signature, NIST P-256 In-Reply-To: References: Message-ID: <87a7lwoh2k.fsf@iwagami.gniibe.org> Yann Garcia wrote: > q=hex_to_data("AC529F186F485D194EBE3677EA9FD1D7E7280648081A01686B3E78528D8AA5C6DC44DB3E54EEF45BA7EE989572D1DC0F83FF071E30B1EE5972D52D22D204A0AD", I think this representation is... [...] > And here was the issue: I got the error message "gcrypt/Not implemented" ... not supported by libgcrypt. For NIST P-256, only standard encoding (prefixed by 0x04, followed by X in MPI, then, Y in MPI [0]) is supported. If not, it results an error in _gcry_ecc_os2ec by GPG_ERR_NOT_IMPLEMENTED (Not implemented). Just put the prefix "04" to your representation, if it's composed by X and Y in big endian. [0] http://www.secg.org/sec1-v2.pdf SEC 1: Elliptic Curve Cryptography (Version 2.0) 2.3.3 Elliptic-Curve-Point-to-Octet-String Conversion -- From garcia.yann at gmail.com Mon Nov 26 08:26:09 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Mon, 26 Nov 2018 08:26:09 +0100 Subject: Trouble to verify ECDSA signature, NIST P-256 In-Reply-To: <87a7lwoh2k.fsf@iwagami.gniibe.org> References: <87a7lwoh2k.fsf@iwagami.gniibe.org> Message-ID: Hello, Oh yes, forgot it, sorry! In my sample, q is the public key. Thanks a lot Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com On Mon, 26 Nov 2018 at 00:47, NIIBE Yutaka wrote: > Yann Garcia wrote: > > > q=hex_to_data("AC529F186F485D194EBE3677EA9FD1D7E7280648081A01686B3E78528D8AA5C6DC44DB3E54EEF45BA7EE989572D1DC0F83FF071E30B1EE5972D52D22D204A0AD", > > I think this representation is... > > [...] > > And here was the issue: I got the error message "gcrypt/Not implemented" > > ... not supported by libgcrypt. > > For NIST P-256, only standard encoding (prefixed by 0x04, followed by X > in MPI, then, Y in MPI [0]) is supported. If not, it results an error > in _gcry_ecc_os2ec by GPG_ERR_NOT_IMPLEMENTED (Not implemented). > > Just put the prefix "04" to your representation, if it's composed by X > and Y in big endian. > > [0] http://www.secg.org/sec1-v2.pdf > SEC 1: Elliptic Curve Cryptography (Version 2.0) > 2.3.3 Elliptic-Curve-Point-to-Octet-String Conversion > -- > -------------- next part -------------- An HTML attachment was scrubbed... URL: From garcia.yann at gmail.com Wed Nov 28 11:17:49 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Wed, 28 Nov 2018 11:17:49 +0100 Subject: Nist P256: How to calculate Y public key when knowing X and the LSB bit of Y public key Message-ID: Hello, I wrote some code to generate an ECDSA S-exp (X, Y) public keys based of an existing compressed key. This compressed key formatted like this: (02||X public key), 02 indicating that the Y key is even (03||X public key), 02 indicating that the Y key is odd I tried my code with this NIST P-256 public keys pair generated also using libgcrypt: Private key := 'D418760F0CB2DCB856BC3C7217AD3AA36DB6742AE1DB655A3D28DF88CBBF84E1'O; Public key X := ' *EE9CC7FBD9EDECEA41F7C8BD258E8D2E988E75BD069ADDCA1E5A38E534AC6818*'O; Publick key Y := '5AE3C8D9FE0B1FC7438F29417C240F8BF81C358EC1A4D0C6E98D8EDBCC714017'O; Public compressed key := ' *03EE9CC7FBD9EDECEA41F7C8BD258E8D2E988E75BD069ADDCA1E5A38E534AC6818*'O; My issue is that the Y keys I calculated is not correct. I did a mistake somewhere but I cannot find it :( Many thanks to you to take a time to verify my code? Here is my code, based on https://en.wikipedia.org/wiki/Quadratic_residue: 1) I convert the x_buffer (containing the public key X) into x, a gcry_mpi_t value if ((rc = gcry_sexp_build(&e_key, NULL, "(e-key(x %b))", buffer_size, x_buffer)) != 0) { ... } if ((x = gcry_sexp_nth_mpi(gcry_sexp_find_token(e_key, "x", 0), 1, GCRYMPI_FMT_USG)) == NULL) { ... } 2) Here, x really contains my public key: output: x= : 00EE9CC7FBD9EDECEA41F7C8BD258E8D2E988E75BD069ADDCA1E5A38E534AC6818 3) The Ecc curve equation is: y^2=x^3+a*x+b, with a and b specific to NIST P-256 elliptic curve I want to calculate y^2: two = gcry_mpi_set_ui (NULL, 2); three = gcry_mpi_set_ui (NULL, 3); four = gcry_mpi_set_ui (NULL, 4); x_3 = gcry_mpi_new (0); axb = gcry_mpi_new (0); y_2 = gcry_mpi_new (0); gcry_mpi_powm (x_3, x, three, p); // w = b^e \bmod m. gcry_mpi_mulm (axb, a, x, p); gcry_mpi_addm (axb, axb, b, p); gcry_mpi_addm (y_2, x_3, axb, p); 4) Here y_2 contains the result of x^3+a*x+b output: y_2: 00E2BC9B1E5CB40472C271A5FAB056FA5D821591027481894A50B1ADEA18A6ABF0 5) I'm going to calculate sqrt(y^2): two solutions : y = p + 1 / 4 or y = p - 3 / 4 */ q = gcry_mpi_new (0); r = gcry_mpi_new (0); y = gcry_mpi_new (0); if (p_comp_mode == 0x02) { // Y key is even /* Solution one: y = p + 1 / 4 */ p_plus_1 = gcry_mpi_new (0); gcry_mpi_add_ui(p_plus_1, p, 1); gcry_mpi_div(q, r, p_plus_1, four, 0); gcry_mpi_release(p_plus_1); } else { // Y key is odd /* Solution two: p - 3 / 4 */ p_minus_3 = gcry_mpi_new (0); gcry_mpi_sub_ui(p_minus_3, p, 3); gcry_mpi_div(q, r, p_minus_3, four, 0); gcry_mpi_release(p_minus_3); } gcry_mpi_powm(y, y_2, q, p); show_mpi("y", "", y); 6) Here Y contain the Y public key: output: y: 3ED29A0C723BDE987C0D4DE143FB7781F476AA385D71E42C66BF5F019F850F3E 7) This is not what I was expected for, it should be: '5AE3C8D9FE0B1FC7438F29417C240F8BF81C358EC1A4D0C6E98D8EDBCC714017'O; Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From garcia.yann at gmail.com Fri Nov 30 06:49:50 2018 From: garcia.yann at gmail.com (Yann Garcia) Date: Fri, 30 Nov 2018 06:49:50 +0100 Subject: Trouble to verify ECDSA signature, NIST P-256 In-Reply-To: References: <87a7lwoh2k.fsf@iwagami.gniibe.org> Message-ID: Hello, Thanks a lot, it works fine now. Best regards, Yann Garcia Senior Software Engineer Microsoft MCAD.net Certified ************************************** FSCOM SARL Le Montespan B2 6, Avenue des Alpes F-06600 Antibes, FRANCE ************************************************ Tel: +33 (0)4 92 94 49 08 Mobile: +33 (0)7 61 00 77 05 Email: *yann.garcia at fscom.fr* Yann.Garcia_EXT at etsi.org Skype: yann.garcia Google+: garcia.yann at gmail.com On Mon, 26 Nov 2018 at 08:26, Yann Garcia wrote: > > Hello, > > Oh yes, forgot it, sorry! > In my sample, q is the public key. > > Thanks a lot > Best regards, > > Yann Garcia > Senior Software Engineer > Microsoft MCAD.net Certified > ************************************** > FSCOM SARL > Le Montespan B2 > 6, > > Avenue des Alpes > > > F-06600 Antibes, FRANCE > ************************************************ > Tel: +33 (0)4 92 94 49 08 > Mobile: +33 (0)7 61 00 77 05 > Email: *yann.garcia at fscom.fr* > Yann.Garcia_EXT at etsi.org > Skype: yann.garcia > Google+: garcia.yann at gmail.com > > > On Mon, 26 Nov 2018 at 00:47, NIIBE Yutaka wrote: > Yann Garcia wrote: > > > q=hex_to_data("AC529F186F485D194EBE3677EA9FD1D7E7280648081A01686B3E78528D8AA5C6DC44DB3E54EEF45BA7EE989572D1DC0F83FF071E30B1EE5972D52D22D204A0AD", > > I think this representation is... > > [...] > > And here was the issue: I got the error message "gcrypt/Not implemented" > > ... not supported by libgcrypt. > > For NIST P-256, only standard encoding (prefixed by 0x04, followed by X > in MPI, then, Y in MPI [0]) is supported. If not, it results an error > in _gcry_ecc_os2ec by GPG_ERR_NOT_IMPLEMENTED (Not implemented). > > Just put the prefix "04" to your representation, if it's composed by X > and Y in big endian. > > [0] http://www.secg.org/sec1-v2.pdf > SEC 1: Elliptic Curve Cryptography (Version 2.0) > 2.3.3 Elliptic-Curve-Point-to-Octet-String Conversion > -- > -------------- next part -------------- An HTML attachment was scrubbed... URL: