[PATCH] hwf-x86: remove unused HWF_INTEL_FAST_VPGATHER

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed Sep 17 19:37:11 CEST 2025


* cipher/twofish-avx2-amd64.S (RNOT): Update comment on usage of this register
macro.
* doc/gcrypt.texi: Remove 'intel-fast-vpgather'.
* src/g10lib.h (HWF_INTEL_FAST_VPGATHER): Remove.
(HWF_INTEL_RDTSC, HWF_INTEL_SHAEXT, HWF_INTEL_VAES_VPCLMUL, HWF_INTEL_AVX512)
(HWF_INTEL_GFNI): Shift bit position to close the gap left by FAST_VPGATHER
removal.
* src/hwf-x86.c (detect_x86_gnuc): Remove HWF_INTEL_FAST_VPGATHER detection.
* src/hwfeatures.c (hwflist): Remove 'intel-fast-vpgather'.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/twofish-avx2-amd64.S |  2 +-
 doc/gcrypt.texi             |  1 -
 src/g10lib.h                | 11 +++---
 src/hwf-x86.c               | 68 +------------------------------------
 src/hwfeatures.c            |  1 -
 5 files changed, 7 insertions(+), 76 deletions(-)

diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index 3f61f87b..67c1e52e 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -89,7 +89,7 @@
 #define RTMP4   RT1
 #define RTMP4x  RT1x
 
-/* vpgatherdd mask and '-1' */
+/* all bits set mask and '-1' */
 #define RNOT	%ymm6
 #define RNOTx	%xmm6
 
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 9adf020f..e2b8223a 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
@@ -581,7 +581,6 @@ are
 @item intel-rdrand
 @item intel-avx
 @item intel-avx2
- at item intel-fast-vpgather
 @item intel-rdtsc
 @item intel-shaext
 @item intel-vaes-vpclmul
diff --git a/src/g10lib.h b/src/g10lib.h
index 68ce5405..770ae344 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -234,12 +234,11 @@ char **_gcry_strtokenize (const char *string, const char *delim);
 #define HWF_INTEL_RDRAND        (1 << 11)
 #define HWF_INTEL_AVX           (1 << 12)
 #define HWF_INTEL_AVX2          (1 << 13)
-#define HWF_INTEL_FAST_VPGATHER (1 << 14)
-#define HWF_INTEL_RDTSC         (1 << 15)
-#define HWF_INTEL_SHAEXT        (1 << 16)
-#define HWF_INTEL_VAES_VPCLMUL  (1 << 17)
-#define HWF_INTEL_AVX512        (1 << 18)
-#define HWF_INTEL_GFNI          (1 << 19)
+#define HWF_INTEL_RDTSC         (1 << 14)
+#define HWF_INTEL_SHAEXT        (1 << 15)
+#define HWF_INTEL_VAES_VPCLMUL  (1 << 16)
+#define HWF_INTEL_AVX512        (1 << 17)
+#define HWF_INTEL_GFNI          (1 << 18)
 
 #elif defined(HAVE_CPU_ARCH_ARM)
 
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index d9d34c0e..54af1c83 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -197,7 +197,6 @@ detect_x86_gnuc (void)
   unsigned int max_cpuid_level;
   unsigned int fms, family, model;
   unsigned int result = 0;
-  unsigned int avoid_vpgather = 0;
   unsigned int is_amd_cpu = 0;
 
   (void)os_supports_avx_avx2_registers;
@@ -255,6 +254,7 @@ detect_x86_gnuc (void)
     {
       /* This is an AMD CPU.  */
       is_amd_cpu = 1;
+      (void)is_amd_cpu;
     }
 
   /* Detect Intel features, that might also be supported by other
@@ -411,61 +411,6 @@ detect_x86_gnuc (void)
 	  result |= HWF_INTEL_FAST_SHLD;
 	  break;
 	}
-
-      /* These Intel Core processors that have AVX2 have slow VPGATHER and
-       * should be avoided for table-lookup use. */
-      switch (model)
-	{
-	case 0x3C:
-	case 0x3F:
-	case 0x45:
-	case 0x46:
-	  /* Haswell */
-	  avoid_vpgather |= 1;
-	  break;
-	}
-
-      /* These Intel Core processors (skylake to tigerlake) have slow VPGATHER
-       * because of mitigation introduced by new microcode (2023-08-08) for
-       * "Downfall" speculative execution vulnerability. */
-      switch (model)
-	{
-	/* Skylake, Cascade Lake, Cooper Lake */
-	case 0x4E:
-	case 0x5E:
-	case 0x55:
-	/* Kaby Lake, Coffee Lake, Whiskey Lake, Amber Lake */
-	case 0x8E:
-	case 0x9E:
-	/* Cannon Lake */
-	case 0x66:
-	/* Comet Lake */
-	case 0xA5:
-	case 0xA6:
-	/* Ice Lake */
-	case 0x7E:
-	case 0x6A:
-	case 0x6C:
-	/* Tiger Lake */
-	case 0x8C:
-	case 0x8D:
-	/* Rocket Lake */
-	case 0xA7:
-	  avoid_vpgather |= 1;
-	  break;
-	}
-    }
-  else if (is_amd_cpu)
-    {
-      /* Non-AVX512 AMD CPUs (pre-Zen4) have slow VPGATHER and should be
-       * avoided for table-lookup use. */
-      avoid_vpgather |= !(result & HWF_INTEL_AVX512);
-    }
-  else
-    {
-      /* Avoid VPGATHER for non-Intel/non-AMD CPUs as testing is needed to
-       * make sure it is fast enough. */
-      avoid_vpgather |= 1;
     }
 
 #ifdef ENABLE_FORCE_SOFT_HWFEATURES
@@ -483,19 +428,8 @@ detect_x86_gnuc (void)
    * only for those Intel processors that benefit from the SHLD
    * instruction. Enabled here unconditionally as requested. */
   result |= HWF_INTEL_FAST_SHLD;
-
-  /* VPGATHER instructions are used for look-up table based
-   * implementations which require VPGATHER to be fast enough to beat
-   * regular parallelized look-up table implementations (see Twofish).
-   * So far, only Intel processors beginning with Skylake and AMD
-   * processors starting with Zen4 have had VPGATHER fast enough to be
-   * enabled. Enable VPGATHER here unconditionally as requested. */
-  avoid_vpgather = 0;
 #endif
 
-  if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
-    result |= HWF_INTEL_FAST_VPGATHER;
-
   return result;
 }
 #endif /* HAS_X86_CPUID */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 44453ef1..41a68271 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -58,7 +58,6 @@ static struct
     { HWF_INTEL_RDRAND,        "intel-rdrand" },
     { HWF_INTEL_AVX,           "intel-avx" },
     { HWF_INTEL_AVX2,          "intel-avx2" },
-    { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
     { HWF_INTEL_RDTSC,         "intel-rdtsc" },
     { HWF_INTEL_SHAEXT,        "intel-shaext" },
     { HWF_INTEL_VAES_VPCLMUL,  "intel-vaes-vpclmul" },
-- 
2.48.1




More information about the Gcrypt-devel mailing list