From glin at suse.com Tue Sep 2 09:14:27 2025 From: glin at suse.com (Gary Lin) Date: Tue, 2 Sep 2025 15:14:27 +0800 Subject: [PATCH] kdf: Avoid redundant memcpy() Message-ID: <20250902071427.1781-1-glin@suse.com> * cipher/kdf.c (fill_block): Xor 'ref_block' and 'prev_block' directly to avoid a redundant memcpy(). Signed-off-by: Gary Lin --- cipher/kdf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cipher/kdf.c b/cipher/kdf.c index 71156ea4..16cec4e4 100644 --- a/cipher/kdf.c +++ b/cipher/kdf.c @@ -588,9 +588,14 @@ fill_block (const u64 *prev_block, const u64 *ref_block, u64 *curr_block, u64 block_tmp[ARGON2_WORDS_IN_BLOCK]; int i; - memcpy (block_r, ref_block, 1024); if (prev_block) - xor_block (block_r, prev_block); + { + for (i = 0; i < ARGON2_WORDS_IN_BLOCK; i++) + block_r[i] = ref_block[i] ^ prev_block[i]; + } + else + memcpy (block_r, ref_block, 1024); + memcpy (block_tmp, block_r, 1024); if (with_xor) -- 2.51.0 From jussi.kivilinna at iki.fi Wed Sep 17 17:38:02 2025 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 17 Sep 2025 18:38:02 +0300 Subject: [PATCH] hwfeature: fix "riscv-zvkg" HW feature mapping Message-ID: <20250917153802.315674-1-jussi.kivilinna@iki.fi> * src/hwfeatures.c (hwflist) [HAVE_CPU_ARCH_RISCV]: Fix mapping from ZVKNED to ZVKG for "riscv-zvkg". -- Signed-off-by: Jussi Kivilinna --- src/hwfeatures.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hwfeatures.c b/src/hwfeatures.c index 06709da7..44453ef1 100644 --- a/src/hwfeatures.c +++ b/src/hwfeatures.c @@ -98,7 +98,7 @@ static struct { HWF_RISCV_ZBB, "riscv-zbb" }, { HWF_RISCV_ZBC, "riscv-zbc" }, { HWF_RISCV_ZVKB, "riscv-zvkb" }, - { HWF_RISCV_ZVKNED, "riscv-zvkg" }, + { HWF_RISCV_ZVKG, "riscv-zvkg" }, { HWF_RISCV_ZVKNED, "riscv-zvkned" }, { HWF_RISCV_ZVKNHA, "riscv-zvknha" }, { HWF_RISCV_ZVKNHB, "riscv-zvknhb" }, -- 2.48.1 From jussi.kivilinna at iki.fi Wed Sep 17 18:02:41 2025 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 17 Sep 2025 19:02:41 +0300 Subject: [PATCH] t-kem: fix test loop iteration Message-ID: <20250917160241.400420-1-jussi.kivilinna@iki.fi> * tests/t-kem.c (check_kem): Reset loop iterator for each algo test loop. -- Signed-off-by: Jussi Kivilinna --- tests/t-kem.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/t-kem.c b/tests/t-kem.c index cafb68b1..dee879a0 100644 --- a/tests/t-kem.c +++ b/tests/t-kem.c @@ -483,17 +483,17 @@ check_kem (int n_loops) info ("Checking KEM.\n"); ntests = 0; - testno = 0; + if ((selected_algo & SELECTED_ALGO_SNTRUP761)) { - for (; testno < n_loops; testno++) + for (testno = 0; testno < n_loops; testno++) test_kem_sntrup761 (testno); ntests += n_loops; } if ((selected_algo & SELECTED_ALGO_CM6688128F)) { - for (; testno < n_loops; testno++) + for (testno = 0; testno < n_loops; testno++) test_kem_mceliece6688128f (testno); ntests += n_loops; } @@ -501,21 +501,21 @@ check_kem (int n_loops) #ifdef USE_KYBER if ((selected_algo & SELECTED_ALGO_MLKEM512)) { - for (; testno < ntests + n_loops; testno++) + for (testno = 0; testno < ntests + n_loops; testno++) test_kem_mlkem512 (testno); ntests += n_loops; } if ((selected_algo & SELECTED_ALGO_MLKEM768)) { - for (; testno < ntests + n_loops; testno++) + for (testno = 0; testno < ntests + n_loops; testno++) test_kem_mlkem768 (testno); ntests += n_loops; } if ((selected_algo & SELECTED_ALGO_MLKEM1024)) { - for (; testno < ntests + n_loops; testno++) + for (testno = 0; testno < ntests + n_loops; testno++) test_kem_mlkem1024 (testno); ntests += n_loops; } @@ -523,14 +523,14 @@ check_kem (int n_loops) if ((selected_algo & SELECTED_ALGO_RAW_X25519)) { - for (; testno < ntests + n_loops; testno++) + for (testno = 0; testno < ntests + n_loops; testno++) test_kem_raw_x25519 (testno); ntests += n_loops; } if ((selected_algo & SELECTED_ALGO_DHKEM25519)) { - for (; testno < ntests + n_loops; testno++) + for (testno = 0; testno < ntests + n_loops; testno++) test_kem_dhkem_x25519 (testno); ntests += n_loops; } -- 2.48.1 From jussi.kivilinna at iki.fi Wed Sep 17 19:37:11 2025 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Wed, 17 Sep 2025 20:37:11 +0300 Subject: [PATCH] hwf-x86: remove unused HWF_INTEL_FAST_VPGATHER Message-ID: <20250917173711.799675-1-jussi.kivilinna@iki.fi> * cipher/twofish-avx2-amd64.S (RNOT): Update comment on usage of this register macro. * doc/gcrypt.texi: Remove 'intel-fast-vpgather'. * src/g10lib.h (HWF_INTEL_FAST_VPGATHER): Remove. (HWF_INTEL_RDTSC, HWF_INTEL_SHAEXT, HWF_INTEL_VAES_VPCLMUL, HWF_INTEL_AVX512) (HWF_INTEL_GFNI): Shift bit position to close the gap left by FAST_VPGATHER removal. * src/hwf-x86.c (detect_x86_gnuc): Remove HWF_INTEL_FAST_VPGATHER detection. * src/hwfeatures.c (hwflist): Remove 'intel-fast-vpgather'. -- Signed-off-by: Jussi Kivilinna --- cipher/twofish-avx2-amd64.S | 2 +- doc/gcrypt.texi | 1 - src/g10lib.h | 11 +++--- src/hwf-x86.c | 68 +------------------------------------ src/hwfeatures.c | 1 - 5 files changed, 7 insertions(+), 76 deletions(-) diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S index 3f61f87b..67c1e52e 100644 --- a/cipher/twofish-avx2-amd64.S +++ b/cipher/twofish-avx2-amd64.S @@ -89,7 +89,7 @@ #define RTMP4 RT1 #define RTMP4x RT1x -/* vpgatherdd mask and '-1' */ +/* all bits set mask and '-1' */ #define RNOT %ymm6 #define RNOTx %xmm6 diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index 9adf020f..e2b8223a 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -581,7 +581,6 @@ are @item intel-rdrand @item intel-avx @item intel-avx2 - at item intel-fast-vpgather @item intel-rdtsc @item intel-shaext @item intel-vaes-vpclmul diff --git a/src/g10lib.h b/src/g10lib.h index 68ce5405..770ae344 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -234,12 +234,11 @@ char **_gcry_strtokenize (const char *string, const char *delim); #define HWF_INTEL_RDRAND (1 << 11) #define HWF_INTEL_AVX (1 << 12) #define HWF_INTEL_AVX2 (1 << 13) -#define HWF_INTEL_FAST_VPGATHER (1 << 14) -#define HWF_INTEL_RDTSC (1 << 15) -#define HWF_INTEL_SHAEXT (1 << 16) -#define HWF_INTEL_VAES_VPCLMUL (1 << 17) -#define HWF_INTEL_AVX512 (1 << 18) -#define HWF_INTEL_GFNI (1 << 19) +#define HWF_INTEL_RDTSC (1 << 14) +#define HWF_INTEL_SHAEXT (1 << 15) +#define HWF_INTEL_VAES_VPCLMUL (1 << 16) +#define HWF_INTEL_AVX512 (1 << 17) +#define HWF_INTEL_GFNI (1 << 18) #elif defined(HAVE_CPU_ARCH_ARM) diff --git a/src/hwf-x86.c b/src/hwf-x86.c index d9d34c0e..54af1c83 100644 --- a/src/hwf-x86.c +++ b/src/hwf-x86.c @@ -197,7 +197,6 @@ detect_x86_gnuc (void) unsigned int max_cpuid_level; unsigned int fms, family, model; unsigned int result = 0; - unsigned int avoid_vpgather = 0; unsigned int is_amd_cpu = 0; (void)os_supports_avx_avx2_registers; @@ -255,6 +254,7 @@ detect_x86_gnuc (void) { /* This is an AMD CPU. */ is_amd_cpu = 1; + (void)is_amd_cpu; } /* Detect Intel features, that might also be supported by other @@ -411,61 +411,6 @@ detect_x86_gnuc (void) result |= HWF_INTEL_FAST_SHLD; break; } - - /* These Intel Core processors that have AVX2 have slow VPGATHER and - * should be avoided for table-lookup use. */ - switch (model) - { - case 0x3C: - case 0x3F: - case 0x45: - case 0x46: - /* Haswell */ - avoid_vpgather |= 1; - break; - } - - /* These Intel Core processors (skylake to tigerlake) have slow VPGATHER - * because of mitigation introduced by new microcode (2023-08-08) for - * "Downfall" speculative execution vulnerability. */ - switch (model) - { - /* Skylake, Cascade Lake, Cooper Lake */ - case 0x4E: - case 0x5E: - case 0x55: - /* Kaby Lake, Coffee Lake, Whiskey Lake, Amber Lake */ - case 0x8E: - case 0x9E: - /* Cannon Lake */ - case 0x66: - /* Comet Lake */ - case 0xA5: - case 0xA6: - /* Ice Lake */ - case 0x7E: - case 0x6A: - case 0x6C: - /* Tiger Lake */ - case 0x8C: - case 0x8D: - /* Rocket Lake */ - case 0xA7: - avoid_vpgather |= 1; - break; - } - } - else if (is_amd_cpu) - { - /* Non-AVX512 AMD CPUs (pre-Zen4) have slow VPGATHER and should be - * avoided for table-lookup use. */ - avoid_vpgather |= !(result & HWF_INTEL_AVX512); - } - else - { - /* Avoid VPGATHER for non-Intel/non-AMD CPUs as testing is needed to - * make sure it is fast enough. */ - avoid_vpgather |= 1; } #ifdef ENABLE_FORCE_SOFT_HWFEATURES @@ -483,19 +428,8 @@ detect_x86_gnuc (void) * only for those Intel processors that benefit from the SHLD * instruction. Enabled here unconditionally as requested. */ result |= HWF_INTEL_FAST_SHLD; - - /* VPGATHER instructions are used for look-up table based - * implementations which require VPGATHER to be fast enough to beat - * regular parallelized look-up table implementations (see Twofish). - * So far, only Intel processors beginning with Skylake and AMD - * processors starting with Zen4 have had VPGATHER fast enough to be - * enabled. Enable VPGATHER here unconditionally as requested. */ - avoid_vpgather = 0; #endif - if ((result & HWF_INTEL_AVX2) && !avoid_vpgather) - result |= HWF_INTEL_FAST_VPGATHER; - return result; } #endif /* HAS_X86_CPUID */ diff --git a/src/hwfeatures.c b/src/hwfeatures.c index 44453ef1..41a68271 100644 --- a/src/hwfeatures.c +++ b/src/hwfeatures.c @@ -58,7 +58,6 @@ static struct { HWF_INTEL_RDRAND, "intel-rdrand" }, { HWF_INTEL_AVX, "intel-avx" }, { HWF_INTEL_AVX2, "intel-avx2" }, - { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" }, { HWF_INTEL_RDTSC, "intel-rdtsc" }, { HWF_INTEL_SHAEXT, "intel-shaext" }, { HWF_INTEL_VAES_VPCLMUL, "intel-vaes-vpclmul" }, -- 2.48.1