[PATCH 3/7] sm3: add Intel SM3 extension implementation
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sun Jun 28 14:37:37 CEST 2026
* LICENSES: Add 'cipher/sm3-intel-avx2-amd64.S'.
* cipher/Makefile.am: Add 'sm3-intel-avx2-amd64.S'.
* cipher/sm3-intel-avx2-amd64.S: New.
* cipher/sm3.c (USE_INTEL_SM3): New.
(ASM_FUNC_ABI, ASM_EXTRA_STACK): Define also for USE_INTEL_SM3.
[USE_INTEL_SM3] (_gcry_sm3_transform_intel_avx2)
(do_sm3_transform_intel_avx2): New.
(sm3_init) [USE_INTEL_SM3]: Use Intel SM3 accelerated implementation if
HW feature available.
* configure.ac (gcry_cv_gcc_inline_asm_sm3)
(HAVE_GCC_INLINE_ASM_SM3): New.
(GCRYPT_ASM_DIGESTS) [x86_64]: Add 'sm3-intel-avx2-amd64.lo'.
* doc/gcrypt.texi: Add "intel-sm3" to HW features list.
* src/g10lib.h (HWF_INTEL_SM3): New.
* src/hwf-x86.c (detect_x86_gnuc): Add Intel SM3 detection.
* src/hwfeatures.c (hwflist): Add "intel-sm3".
--
Converted to GAS assembly from the SM3-NI implementation in intel-ipsec-mb.
Uses the Intel SM3 instructions (VSM3MSG1, VSM3MSG2 and VSM3RNDS2).
Tested with Intel SDE (both 'sde -future' instruction mix and SM3 test
vectors), not yet on real hardware.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
LICENSES | 30 +++++
cipher/Makefile.am | 3 +-
cipher/sm3-intel-avx2-amd64.S | 228 ++++++++++++++++++++++++++++++++++
cipher/sm3.c | 32 ++++-
configure.ac | 25 ++++
doc/gcrypt.texi | 1 +
src/g10lib.h | 1 +
src/hwf-x86.c | 4 +
src/hwfeatures.c | 1 +
9 files changed, 323 insertions(+), 2 deletions(-)
create mode 100644 cipher/sm3-intel-avx2-amd64.S
diff --git a/LICENSES b/LICENSES
index c2fea82d..e61dca9a 100644
--- a/LICENSES
+++ b/LICENSES
@@ -86,6 +86,36 @@ with any binary distributions derived from the GNU C Library.
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#+end_quote
+ For files:
+ - cipher/sm3-intel-avx2-amd64.S
+
+#+begin_quote
+ Copyright (c) 2023-2024, Intel Corporation
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#+end_quote
+
For files:
- random/jitterentropy-base.c
- random/jitterentropy-gcd.c
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 0abbfea6..b18ccfd9 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -152,7 +152,8 @@ EXTRA_libcipher_la_SOURCES = \
sha512-intel-shaext.c \
sha512-armv7-neon.S sha512-armv8-aarch64-ce.S sha512-arm.S \
sha512-ppc.c sha512-riscv-zvknhb-zvkb.c sha512-ssse3-i386.c \
- sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \
+ sm3.c sm3-avx-bmi2-amd64.S sm3-intel-avx2-amd64.S sm3-aarch64.S \
+ sm3-armv8-aarch64-ce.S \
keccak.c keccak_permute_32.h keccak_permute_64.h \
keccak-armv7-neon.S keccak-amd64-avx512.S \
stribog.c \
diff --git a/cipher/sm3-intel-avx2-amd64.S b/cipher/sm3-intel-avx2-amd64.S
new file mode 100644
index 00000000..62436e93
--- /dev/null
+++ b/cipher/sm3-intel-avx2-amd64.S
@@ -0,0 +1,228 @@
+/*
+;;
+;; Copyright (c) 2023-2024, Intel Corporation
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are met:
+;;
+;; * Redistributions of source code must retain the above copyright notice,
+;; this list of conditions and the following disclaimer.
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;; * Neither the name of Intel Corporation nor the names of its contributors
+;; may be used to endorse or promote products derived from this software
+;; without specific prior written permission.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;
+*/
+/*
+ * From:
+ * https://github.com/intel/intel-ipsec-mb/blob/2b8d6041780fe94b749c98c08b92e7ea06cf50e2/lib/avx2_t4/sm3_ni_x1_avx2.asm
+ *
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * Uses the Intel SM3 instruction set extension (VSM3MSG1/VSM3MSG2/VSM3RNDS2)
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ defined(HAVE_GCC_INLINE_ASM_SM3) && defined(USE_SM3)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define arg_hash rdi
+#define arg_msg rsi
+#define arg_num_blks rdx
+
+SECTION_RODATA
+
+ELF(.type _gcry_sm3_intel_avx2_consts, at object)
+_gcry_sm3_intel_avx2_consts:
+
+.align 16
+.LSHUFF_MASK:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+ELF(.size _gcry_sm3_intel_avx2_consts,.-_gcry_sm3_intel_avx2_consts)
+
+.text
+
+/* Create 4 x 32-bit new words of message schedule W[] using SM3-NI ISA. */
+#define SM3MSG(W03_00, W07_04, W11_08, W15_12, W19_16, T1, T2) \
+ vpalignr W19_16, W11_08, W07_04, 3*4; /* W19_16 = W10 W9 W8 W7 */ \
+ vpsrldq T1, W15_12, 4; /* T1 = 0 W15 W14 W13 */ \
+ vsm3msg1 W19_16, T1, W03_00; /* W19_16 = WTMP3..0 */ \
+ vpalignr T1, W07_04, W03_00, 3*4; /* T1 = W6 W5 W4 W3 */ \
+ vpalignr T2, W15_12, W11_08, 2*4; /* T2 = W13 W12 W11 W10 */ \
+ vsm3msg2 W19_16, T1, T2; /* W19_16 = W19 W18 W17 W16 */
+
+/* Perform 4 rounds of SM3, consuming 4 words of message schedule W[] and
+ * updating the SM3 state registers ABEF and CDGH. */
+#define SM3ROUNDS4(ABEF, CDGH, W03_00, W07_04, T1, R) \
+ vpunpcklqdq T1, W03_00, W07_04; /* T1 = W5 W4 W1 W0 */ \
+ vsm3rnds2 CDGH, ABEF, T1, R; /* CDGH = updated ABEF */ \
+ vpunpckhqdq T1, W03_00, W07_04; /* T1 = W7 W6 W3 W2 */ \
+ vsm3rnds2 ABEF, CDGH, T1, ((R) + 2); /* ABEF = updated CDGH */
+
+/*
+ * unsigned int
+ * _gcry_sm3_transform_intel_avx2 (void *state, const unsigned char *data,
+ * size_t nblks)
+ */
+.align 16
+.globl _gcry_sm3_transform_intel_avx2
+ELF(.type _gcry_sm3_transform_intel_avx2, at function)
+_gcry_sm3_transform_intel_avx2:
+ /* input:
+ * %rdi: state (8 x u32 hash value)
+ * %rsi: data (message blocks)
+ * %rdx: nblks
+ */
+ CFI_STARTPROC();
+
+ test arg_num_blks, arg_num_blks;
+ jz .Ldone;
+
+ /* Load current hash value and change word order. */
+ vmovdqu xmm6, [arg_hash + 0*16]; /* xmm6 = D C B A */
+ vmovdqu xmm7, [arg_hash + 1*16]; /* xmm7 = H G F E */
+ vpshufd xmm0, xmm6, 0x1B; /* xmm0 = A B C D */
+ vpshufd xmm1, xmm7, 0x1B; /* xmm1 = E F G H */
+ vpunpckhqdq xmm6, xmm1, xmm0; /* xmm6 = A B E F */
+ vpunpcklqdq xmm7, xmm1, xmm0; /* xmm7 = C D G H */
+
+ /* Pre-rotate C,D,G,H. */
+ vpsrld xmm2, xmm7, 9;
+ vpslld xmm3, xmm7, 23;
+ vpxor xmm1, xmm2, xmm3; /* xmm1 = ROL32(CDGH, 23) */
+ vpsrld xmm4, xmm7, 19;
+ vpslld xmm5, xmm7, 13;
+ vpxor xmm0, xmm4, xmm5; /* xmm0 = ROL32(CDGH, 13) */
+ vpblendd xmm7, xmm1, xmm0, 0x3; /* xmm7 = ROL(C,23) ROL(D,23)
+ ROL(G,13) ROL(H,13) */
+
+ vmovdqa xmm12, [.LSHUFF_MASK ADD_RIP];
+
+.align 16
+.Lblock_loop:
+ vmovdqa xmm10, xmm6;
+ vmovdqa xmm11, xmm7;
+
+ /* Prepare W[0..15] - read and byte-swap the message words. */
+ vmovdqu xmm2, [arg_msg + 0*16];
+ vmovdqu xmm3, [arg_msg + 1*16];
+ vmovdqu xmm4, [arg_msg + 2*16];
+ vmovdqu xmm5, [arg_msg + 3*16];
+ vpshufb xmm2, xmm2, xmm12; /* xmm2 = W03 W02 W01 W00 */
+ vpshufb xmm3, xmm3, xmm12; /* xmm3 = W07 W06 W05 W04 */
+ vpshufb xmm4, xmm4, xmm12; /* xmm4 = W11 W10 W09 W08 */
+ vpshufb xmm5, xmm5, xmm12; /* xmm5 = W15 W14 W13 W12 */
+
+ SM3MSG(xmm2, xmm3, xmm4, xmm5, xmm8, xmm9, xmm1) /* W19..16 */
+ SM3ROUNDS4(xmm6, xmm7, xmm2, xmm3, xmm1, 0)
+
+ vmovdqa xmm2, xmm8;
+ SM3MSG(xmm3, xmm4, xmm5, xmm2, xmm8, xmm9, xmm1) /* W23..20 */
+ SM3ROUNDS4(xmm6, xmm7, xmm3, xmm4, xmm1, 4)
+
+ vmovdqa xmm3, xmm8;
+ SM3MSG(xmm4, xmm5, xmm2, xmm3, xmm8, xmm9, xmm1) /* W27..24 */
+ SM3ROUNDS4(xmm6, xmm7, xmm4, xmm5, xmm1, 8)
+
+ vmovdqa xmm4, xmm8;
+ SM3MSG(xmm5, xmm2, xmm3, xmm4, xmm8, xmm9, xmm1) /* W31..28 */
+ SM3ROUNDS4(xmm6, xmm7, xmm5, xmm2, xmm1, 12)
+
+ vmovdqa xmm5, xmm8;
+ SM3MSG(xmm2, xmm3, xmm4, xmm5, xmm8, xmm9, xmm1) /* W35..32 */
+ SM3ROUNDS4(xmm6, xmm7, xmm2, xmm3, xmm1, 16)
+
+ vmovdqa xmm2, xmm8;
+ SM3MSG(xmm3, xmm4, xmm5, xmm2, xmm8, xmm9, xmm1) /* W39..36 */
+ SM3ROUNDS4(xmm6, xmm7, xmm3, xmm4, xmm1, 20)
+
+ vmovdqa xmm3, xmm8;
+ SM3MSG(xmm4, xmm5, xmm2, xmm3, xmm8, xmm9, xmm1) /* W43..40 */
+ SM3ROUNDS4(xmm6, xmm7, xmm4, xmm5, xmm1, 24)
+
+ vmovdqa xmm4, xmm8;
+ SM3MSG(xmm5, xmm2, xmm3, xmm4, xmm8, xmm9, xmm1) /* W47..44 */
+ SM3ROUNDS4(xmm6, xmm7, xmm5, xmm2, xmm1, 28)
+
+ vmovdqa xmm5, xmm8;
+ SM3MSG(xmm2, xmm3, xmm4, xmm5, xmm8, xmm9, xmm1) /* W51..48 */
+ SM3ROUNDS4(xmm6, xmm7, xmm2, xmm3, xmm1, 32)
+
+ vmovdqa xmm2, xmm8;
+ SM3MSG(xmm3, xmm4, xmm5, xmm2, xmm8, xmm9, xmm1) /* W55..52 */
+ SM3ROUNDS4(xmm6, xmm7, xmm3, xmm4, xmm1, 36)
+
+ vmovdqa xmm3, xmm8;
+ SM3MSG(xmm4, xmm5, xmm2, xmm3, xmm8, xmm9, xmm1) /* W59..56 */
+ SM3ROUNDS4(xmm6, xmm7, xmm4, xmm5, xmm1, 40)
+
+ vmovdqa xmm4, xmm8;
+ SM3MSG(xmm5, xmm2, xmm3, xmm4, xmm8, xmm9, xmm1) /* W63..60 */
+ SM3ROUNDS4(xmm6, xmm7, xmm5, xmm2, xmm1, 44)
+
+ vmovdqa xmm5, xmm8;
+ SM3MSG(xmm2, xmm3, xmm4, xmm5, xmm8, xmm9, xmm1) /* W67..64 */
+ SM3ROUNDS4(xmm6, xmm7, xmm2, xmm3, xmm1, 48)
+
+ vmovdqa xmm2, xmm8;
+ SM3ROUNDS4(xmm6, xmm7, xmm3, xmm4, xmm1, 52)
+
+ SM3ROUNDS4(xmm6, xmm7, xmm4, xmm5, xmm1, 56)
+
+ SM3ROUNDS4(xmm6, xmm7, xmm5, xmm2, xmm1, 60)
+
+ /* Add feed-forward to the chaining value and move to the next block. */
+ vpxor xmm6, xmm6, xmm10;
+ vpxor xmm7, xmm7, xmm11;
+ add arg_msg, 64;
+ dec arg_num_blks;
+ jnz .Lblock_loop;
+
+ /* Un-rotate C,D,G,H. */
+ vpslld xmm2, xmm7, 9;
+ vpsrld xmm3, xmm7, 23;
+ vpxor xmm1, xmm2, xmm3; /* xmm1 = ROL32(CDGH, 9) */
+ vpslld xmm4, xmm7, 19;
+ vpsrld xmm5, xmm7, 13;
+ vpxor xmm0, xmm4, xmm5; /* xmm0 = ROL32(CDGH, 19) */
+ vpblendd xmm7, xmm1, xmm0, 0x3; /* xmm7 = ROL(C,9) ROL(D,9)
+ ROL(G,19) ROL(H,19) */
+ vpshufd xmm0, xmm6, 0x1B; /* xmm0 = F E B A */
+ vpshufd xmm1, xmm7, 0x1B; /* xmm1 = H G D C */
+ vpunpcklqdq xmm6, xmm0, xmm1; /* xmm6 = D C B A */
+ vpunpckhqdq xmm7, xmm0, xmm1; /* xmm7 = H G F E */
+ vmovdqu [arg_hash + 0*16], xmm6;
+ vmovdqu [arg_hash + 1*16], xmm7;
+
+ vzeroall;
+
+.Ldone:
+ xor eax, eax;
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm3_transform_intel_avx2,.-_gcry_sm3_transform_intel_avx2;)
+
+#endif
+#endif
diff --git a/cipher/sm3.c b/cipher/sm3.c
index bfe9f4c2..6001afff 100644
--- a/cipher/sm3.c
+++ b/cipher/sm3.c
@@ -56,6 +56,16 @@
# define USE_AVX_BMI2 1
#endif
+/* USE_INTEL_SM3 indicates whether to compile with Intel SM3 extension code. */
+#undef USE_INTEL_SM3
+#if defined(__x86_64__) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_SM3) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_INTEL_SM3 1
+#endif
+
/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly
* code. */
#undef USE_AARCH64_SIMD
@@ -88,7 +98,7 @@ typedef struct {
* stack to store XMM6-XMM15 needed on Win64. */
#undef ASM_FUNC_ABI
#undef ASM_EXTRA_STACK
-#if defined(USE_AVX_BMI2)
+#if defined(USE_AVX_BMI2) || defined(USE_INTEL_SM3)
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
# define ASM_FUNC_ABI __attribute__((sysv_abi))
# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
@@ -115,6 +125,22 @@ do_sm3_transform_amd64_avx_bmi2(void *context, const unsigned char *data,
}
#endif /* USE_AVX_BMI2 */
+#ifdef USE_INTEL_SM3
+unsigned int _gcry_sm3_transform_intel_avx2(void *state,
+ const void *input_data,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sm3_transform_intel_avx2(void *context, const unsigned char *data,
+ size_t nblks)
+{
+ SM3_CONTEXT *hd = context;
+ unsigned int nburn = _gcry_sm3_transform_intel_avx2 (hd->h, data, nblks);
+ nburn += nburn ? ASM_EXTRA_STACK : 0;
+ return nburn;
+}
+#endif /* USE_INTEL_SM3 */
+
#ifdef USE_AARCH64_SIMD
unsigned int _gcry_sm3_transform_aarch64(void *state, const void *input_data,
size_t num_blks);
@@ -173,6 +199,10 @@ sm3_init (void *context, unsigned int flags)
if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
hd->bctx.bwrite = do_sm3_transform_amd64_avx_bmi2;
#endif
+#ifdef USE_INTEL_SM3
+ if ((features & HWF_INTEL_SM3) && (features & HWF_INTEL_AVX2))
+ hd->bctx.bwrite = do_sm3_transform_intel_avx2;
+#endif
#ifdef USE_AARCH64_SIMD
if (features & HWF_ARM_NEON)
hd->bctx.bwrite = do_sm3_transform_aarch64;
diff --git a/configure.ac b/configure.ac
index c11bc3b6..b174e518 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1612,6 +1612,30 @@ if test "$gcry_cv_gcc_inline_asm_sha512" = "yes" ; then
fi
+#
+# Check whether GCC inline assembler supports Intel SM3 instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports Intel SM3 instructions],
+ [gcry_cv_gcc_inline_asm_sm3],
+ [if test "$mpi_cpu_arch" != "x86" ||
+ test "$try_asm_modules" != "yes" ; then
+ gcry_cv_gcc_inline_asm_sm3="n/a"
+ else
+ gcry_cv_gcc_inline_asm_sm3=no
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(
+ [[void a(void) {
+ __asm__("vsm3msg1 %%xmm2, %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("vsm3msg2 %%xmm2, %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("vsm3rnds2 \$0, %%xmm2, %%xmm1, %%xmm3\n\t":::"cc");
+ }]], [ a(); ] )],
+ [gcry_cv_gcc_inline_asm_sm3=yes])
+ fi])
+if test "$gcry_cv_gcc_inline_asm_sm3" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_INLINE_ASM_SM3,1,
+ [Defined if inline assembler supports Intel SM3 instructions])
+fi
+
+
#
# Check whether GCC inline assembler supports SSE4.1 instructions.
#
@@ -4142,6 +4166,7 @@ if test "$found" = "1" ; then
x86_64-*-*)
# Build with the assembly implementation
GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo"
+ GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-intel-avx2-amd64.lo"
;;
aarch64-*-*)
# Build with the assembly implementation
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 58887b8d..b233cd78 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
@@ -587,6 +587,7 @@ are
@item intel-avx512
@item intel-gfni
@item intel-sha512
+ at item intel-sm3
@item arm-neon
@item arm-aes
@item arm-sha1
diff --git a/src/g10lib.h b/src/g10lib.h
index 6abc5f5b..d0e64a69 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -240,6 +240,7 @@ char **_gcry_strtokenize (const char *string, const char *delim);
#define HWF_INTEL_AVX512 (1 << 17)
#define HWF_INTEL_GFNI (1 << 18)
#define HWF_INTEL_SHA512 (1 << 19)
+#define HWF_INTEL_SM3 (1 << 20)
#elif defined(HAVE_CPU_ARCH_ARM)
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index e3e144d0..f8c3c948 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -425,6 +425,10 @@ detect_x86_gnuc (
/* Test bit 0 for Intel SHA512 instructions. */
if ((intel_feat3 & (1 << 0)) && os_supports_avx_avx2_registers)
result |= HWF_INTEL_SHA512;
+
+ /* Test bit 1 for Intel SM3 instructions. */
+ if ((intel_feat3 & (1 << 1)) && os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_SM3;
}
/* Check additional feature flags. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 4c13ce96..4f9053af 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -85,6 +85,7 @@ static struct
{ HWF_INTEL_AVX512, "intel-avx512" },
{ HWF_INTEL_GFNI, "intel-gfni" },
{ HWF_INTEL_SHA512, "intel-sha512" },
+ { HWF_INTEL_SM3, "intel-sm3" },
/* Following removed HW feature strings are kept for API compatibility. */
{ 0, "intel-fast-vpgather" },
#elif defined(HAVE_CPU_ARCH_ARM)
--
2.53.0
More information about the Gcrypt-devel
mailing list