[PATCH] hwf-x86: remove unused HWF_INTEL_FAST_VPGATHER
Jussi Kivilinna
jussi.kivilinna at iki.fi
Wed Sep 17 19:37:11 CEST 2025
* cipher/twofish-avx2-amd64.S (RNOT): Update comment on usage of this register
macro.
* doc/gcrypt.texi: Remove 'intel-fast-vpgather'.
* src/g10lib.h (HWF_INTEL_FAST_VPGATHER): Remove.
(HWF_INTEL_RDTSC, HWF_INTEL_SHAEXT, HWF_INTEL_VAES_VPCLMUL, HWF_INTEL_AVX512)
(HWF_INTEL_GFNI): Shift bit position to close the gap left by FAST_VPGATHER
removal.
* src/hwf-x86.c (detect_x86_gnuc): Remove HWF_INTEL_FAST_VPGATHER detection.
* src/hwfeatures.c (hwflist): Remove 'intel-fast-vpgather'.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/twofish-avx2-amd64.S | 2 +-
doc/gcrypt.texi | 1 -
src/g10lib.h | 11 +++---
src/hwf-x86.c | 68 +------------------------------------
src/hwfeatures.c | 1 -
5 files changed, 7 insertions(+), 76 deletions(-)
diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index 3f61f87b..67c1e52e 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -89,7 +89,7 @@
#define RTMP4 RT1
#define RTMP4x RT1x
-/* vpgatherdd mask and '-1' */
+/* all bits set mask and '-1' */
#define RNOT %ymm6
#define RNOTx %xmm6
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 9adf020f..e2b8223a 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
@@ -581,7 +581,6 @@ are
@item intel-rdrand
@item intel-avx
@item intel-avx2
- at item intel-fast-vpgather
@item intel-rdtsc
@item intel-shaext
@item intel-vaes-vpclmul
diff --git a/src/g10lib.h b/src/g10lib.h
index 68ce5405..770ae344 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -234,12 +234,11 @@ char **_gcry_strtokenize (const char *string, const char *delim);
#define HWF_INTEL_RDRAND (1 << 11)
#define HWF_INTEL_AVX (1 << 12)
#define HWF_INTEL_AVX2 (1 << 13)
-#define HWF_INTEL_FAST_VPGATHER (1 << 14)
-#define HWF_INTEL_RDTSC (1 << 15)
-#define HWF_INTEL_SHAEXT (1 << 16)
-#define HWF_INTEL_VAES_VPCLMUL (1 << 17)
-#define HWF_INTEL_AVX512 (1 << 18)
-#define HWF_INTEL_GFNI (1 << 19)
+#define HWF_INTEL_RDTSC (1 << 14)
+#define HWF_INTEL_SHAEXT (1 << 15)
+#define HWF_INTEL_VAES_VPCLMUL (1 << 16)
+#define HWF_INTEL_AVX512 (1 << 17)
+#define HWF_INTEL_GFNI (1 << 18)
#elif defined(HAVE_CPU_ARCH_ARM)
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index d9d34c0e..54af1c83 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -197,7 +197,6 @@ detect_x86_gnuc (void)
unsigned int max_cpuid_level;
unsigned int fms, family, model;
unsigned int result = 0;
- unsigned int avoid_vpgather = 0;
unsigned int is_amd_cpu = 0;
(void)os_supports_avx_avx2_registers;
@@ -255,6 +254,7 @@ detect_x86_gnuc (void)
{
/* This is an AMD CPU. */
is_amd_cpu = 1;
+ (void)is_amd_cpu;
}
/* Detect Intel features, that might also be supported by other
@@ -411,61 +411,6 @@ detect_x86_gnuc (void)
result |= HWF_INTEL_FAST_SHLD;
break;
}
-
- /* These Intel Core processors that have AVX2 have slow VPGATHER and
- * should be avoided for table-lookup use. */
- switch (model)
- {
- case 0x3C:
- case 0x3F:
- case 0x45:
- case 0x46:
- /* Haswell */
- avoid_vpgather |= 1;
- break;
- }
-
- /* These Intel Core processors (skylake to tigerlake) have slow VPGATHER
- * because of mitigation introduced by new microcode (2023-08-08) for
- * "Downfall" speculative execution vulnerability. */
- switch (model)
- {
- /* Skylake, Cascade Lake, Cooper Lake */
- case 0x4E:
- case 0x5E:
- case 0x55:
- /* Kaby Lake, Coffee Lake, Whiskey Lake, Amber Lake */
- case 0x8E:
- case 0x9E:
- /* Cannon Lake */
- case 0x66:
- /* Comet Lake */
- case 0xA5:
- case 0xA6:
- /* Ice Lake */
- case 0x7E:
- case 0x6A:
- case 0x6C:
- /* Tiger Lake */
- case 0x8C:
- case 0x8D:
- /* Rocket Lake */
- case 0xA7:
- avoid_vpgather |= 1;
- break;
- }
- }
- else if (is_amd_cpu)
- {
- /* Non-AVX512 AMD CPUs (pre-Zen4) have slow VPGATHER and should be
- * avoided for table-lookup use. */
- avoid_vpgather |= !(result & HWF_INTEL_AVX512);
- }
- else
- {
- /* Avoid VPGATHER for non-Intel/non-AMD CPUs as testing is needed to
- * make sure it is fast enough. */
- avoid_vpgather |= 1;
}
#ifdef ENABLE_FORCE_SOFT_HWFEATURES
@@ -483,19 +428,8 @@ detect_x86_gnuc (void)
* only for those Intel processors that benefit from the SHLD
* instruction. Enabled here unconditionally as requested. */
result |= HWF_INTEL_FAST_SHLD;
-
- /* VPGATHER instructions are used for look-up table based
- * implementations which require VPGATHER to be fast enough to beat
- * regular parallelized look-up table implementations (see Twofish).
- * So far, only Intel processors beginning with Skylake and AMD
- * processors starting with Zen4 have had VPGATHER fast enough to be
- * enabled. Enable VPGATHER here unconditionally as requested. */
- avoid_vpgather = 0;
#endif
- if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
- result |= HWF_INTEL_FAST_VPGATHER;
-
return result;
}
#endif /* HAS_X86_CPUID */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 44453ef1..41a68271 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -58,7 +58,6 @@ static struct
{ HWF_INTEL_RDRAND, "intel-rdrand" },
{ HWF_INTEL_AVX, "intel-avx" },
{ HWF_INTEL_AVX2, "intel-avx2" },
- { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
{ HWF_INTEL_RDTSC, "intel-rdtsc" },
{ HWF_INTEL_SHAEXT, "intel-shaext" },
{ HWF_INTEL_VAES_VPCLMUL, "intel-vaes-vpclmul" },
--
2.48.1
More information about the Gcrypt-devel
mailing list