[PATCH v2 1/2] bufhelp: use one-byte aligned type for unaligned memory accesses
Jussi Kivilinna
jussi.kivilinna at iki.fi
Wed Mar 11 18:04:46 CET 2015
* cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS): Enable only when
HAVE_GCC_ATTRIBUTE_PACKED and HAVE_GCC_ATTRIBUTE_ALIGNED are defined.
(bufhelp_int_t): New type.
(buf_cpy, buf_xor, buf_xor_1, buf_xor_2dst, buf_xor_n_copy_2): Use
'bufhelp_int_t'.
* configure.ac (gcry_cv_gcc_attribute_packed): New.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/bufhelp.h | 147 +++++++++++++++++++++++++++++++++---------------------
configure.ac | 18 +++++++
2 files changed, 108 insertions(+), 57 deletions(-)
diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index a372acb..252d3bc 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -33,10 +33,13 @@
#include "bithelp.h"
-#if defined(__i386__) || defined(__x86_64__) || \
- defined(__powerpc__) || defined(__powerpc64__) || \
- (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
- defined(__aarch64__)
+#undef BUFHELP_FAST_UNALIGNED_ACCESS
+#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
+ defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \
+ (defined(__i386__) || defined(__x86_64__) || \
+ defined(__powerpc__) || defined(__powerpc64__) || \
+ (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
+ defined(__aarch64__))
/* These architectures are able of unaligned memory accesses and can
handle those fast.
*/
@@ -44,6 +47,25 @@
#endif
+#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
+/* Define type with one-byte alignment on architectures with fast unaligned
+ memory accesses.
+ */
+typedef struct bufhelp_int_s
+{
+ uintptr_t a;
+} __attribute__((packed, aligned(1))) bufhelp_int_t;
+#else
+/* Define type with default alignment for other architectures (unaligned
+ accessed handled in per byte loops).
+ */
+typedef struct bufhelp_int_s
+{
+ uintptr_t a;
+} bufhelp_int_t;
+#endif
+
+
/* Optimized function for small buffer copying */
static inline void
buf_cpy(void *_dst, const void *_src, size_t len)
@@ -54,21 +76,21 @@ buf_cpy(void *_dst, const void *_src, size_t len)
#else
byte *dst = _dst;
const byte *src = _src;
- uintptr_t *ldst;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ = *lsrc++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a = (lsrc++)->a;
dst = (byte *)ldst;
src = (const byte *)lsrc;
@@ -90,22 +112,22 @@ buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
byte *dst = _dst;
const byte *src1 = _src1;
const byte *src2 = _src2;
- uintptr_t *ldst;
- const uintptr_t *lsrc1, *lsrc2;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc1, *lsrc2;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc1 = (const uintptr_t *)(const void *)src1;
- lsrc2 = (const uintptr_t *)(const void *)src2;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc1 = (const bufhelp_int_t *)(const void *)src1;
+ lsrc2 = (const bufhelp_int_t *)(const void *)src2;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ = *lsrc1++ ^ *lsrc2++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
dst = (byte *)ldst;
src1 = (const byte *)lsrc1;
@@ -126,21 +148,21 @@ buf_xor_1(void *_dst, const void *_src, size_t len)
{
byte *dst = _dst;
const byte *src = _src;
- uintptr_t *ldst;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ ^= *lsrc++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a ^= (lsrc++)->a;
dst = (byte *)ldst;
src = (const byte *)lsrc;
@@ -162,22 +184,22 @@ buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
byte *dst1 = _dst1;
byte *dst2 = _dst2;
const byte *src = _src;
- uintptr_t *ldst1, *ldst2;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst1, *ldst2;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)
goto do_bytes;
#endif
- ldst1 = (uintptr_t *)(void *)dst1;
- ldst2 = (uintptr_t *)(void *)dst2;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst1 = (bufhelp_int_t *)(void *)dst1;
+ ldst2 = (bufhelp_int_t *)(void *)dst2;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst1++ = (*ldst2++ ^= *lsrc++);
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
dst1 = (byte *)ldst1;
dst2 = (byte *)ldst2;
@@ -203,11 +225,11 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
const byte *src_xor = _src_xor;
const byte *src_cpy = _src_cpy;
byte temp;
- uintptr_t *ldst_xor, *lsrcdst_cpy;
- const uintptr_t *lsrc_cpy, *lsrc_xor;
+ bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
+ const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
uintptr_t ltemp;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
@@ -215,16 +237,16 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
goto do_bytes;
#endif
- ldst_xor = (uintptr_t *)(void *)dst_xor;
- lsrc_xor = (const uintptr_t *)(void *)src_xor;
- lsrcdst_cpy = (uintptr_t *)(void *)srcdst_cpy;
- lsrc_cpy = (const uintptr_t *)(const void *)src_cpy;
+ ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
+ lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
+ lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
+ lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
{
- ltemp = *lsrc_cpy++;
- *ldst_xor++ = *lsrcdst_cpy ^ *lsrc_xor++;
- *lsrcdst_cpy++ = ltemp;
+ ltemp = (lsrc_cpy++)->a;
+ (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
+ (lsrcdst_cpy++)->a = ltemp;
}
dst_xor = (byte *)ldst_xor;
@@ -357,53 +379,64 @@ static inline void buf_put_le64(void *_buf, u64 val)
#else /*BUFHELP_FAST_UNALIGNED_ACCESS*/
+typedef struct bufhelp_u32_s
+{
+ u32 a;
+} __attribute__((packed, aligned(1))) bufhelp_u32_t;
+
/* Functions for loading and storing unaligned u32 values of different
endianness. */
static inline u32 buf_get_be32(const void *_buf)
{
- return be_bswap32(*(const u32 *)_buf);
+ return be_bswap32(((const bufhelp_u32_t *)_buf)->a);
}
static inline u32 buf_get_le32(const void *_buf)
{
- return le_bswap32(*(const u32 *)_buf);
+ return le_bswap32(((const bufhelp_u32_t *)_buf)->a);
}
static inline void buf_put_be32(void *_buf, u32 val)
{
- u32 *out = _buf;
- *out = be_bswap32(val);
+ bufhelp_u32_t *out = _buf;
+ out->a = be_bswap32(val);
}
static inline void buf_put_le32(void *_buf, u32 val)
{
- u32 *out = _buf;
- *out = le_bswap32(val);
+ bufhelp_u32_t *out = _buf;
+ out->a = le_bswap32(val);
}
#ifdef HAVE_U64_TYPEDEF
+
+typedef struct bufhelp_u64_s
+{
+ u64 a;
+} __attribute__((packed, aligned(1))) bufhelp_u64_t;
+
/* Functions for loading and storing unaligned u64 values of different
endianness. */
static inline u64 buf_get_be64(const void *_buf)
{
- return be_bswap64(*(const u64 *)_buf);
+ return be_bswap64(((const bufhelp_u64_t *)_buf)->a);
}
static inline u64 buf_get_le64(const void *_buf)
{
- return le_bswap64(*(const u64 *)_buf);
+ return le_bswap64(((const bufhelp_u64_t *)_buf)->a);
}
static inline void buf_put_be64(void *_buf, u64 val)
{
- u64 *out = _buf;
- *out = be_bswap64(val);
+ bufhelp_u64_t *out = _buf;
+ out->a = be_bswap64(val);
}
static inline void buf_put_le64(void *_buf, u64 val)
{
- u64 *out = _buf;
- *out = le_bswap64(val);
+ bufhelp_u64_t *out = _buf;
+ out->a = le_bswap64(val);
}
#endif /*HAVE_U64_TYPEDEF*/
diff --git a/configure.ac b/configure.ac
index 4bbd686..16f6a21 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,6 +958,24 @@ fi
#
+# Check whether the compiler supports the GCC style packed attribute
+#
+AC_CACHE_CHECK([whether the GCC style packed attribute is supported],
+ [gcry_cv_gcc_attribute_packed],
+ [gcry_cv_gcc_attribute_packed=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[struct foo_s { char a; long b; } __attribute__ ((packed));
+ enum bar {
+ FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))),
+ };]])],
+ [gcry_cv_gcc_attribute_packed=yes])])
+if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1,
+ [Defined if a GCC style "__attribute__ ((packed))" is supported])
+fi
+
+
+#
# Check whether the compiler supports 'asm' or '__asm__' keyword for
# assembler blocks.
#
More information about the Gcrypt-devel
mailing list