[PATCH] Fix carry flag clobber for powerpc inline assembly with clang

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed May 13 07:17:29 CEST 2026


* mpi/longlong.h [_ARCH_PPC || _ARCH_PPC64] (__PPC_CLOBBER_CC)
(add_ssaaaa, sub_ddmmss): Add "xer" to clobber list.
* mpi/ec-inline.h [__powerpc__] (ADD3_LIMB64, SUB3_LIMB64)
(ADD4_LIMB64, SUB4_LIMB64, ADD5_LIMB64, SUB5_LIMB64): Likewise.
* cipher/poly1305.c [__powerpc__] (ADD_1305_64): Likewise.
--

Add "xer" to inline assembly clobber list as carry flag is passed
through XER register on PowerPC. On GCC "cc" clobbers XER implicitly
but with clang XER needs to be explicitly clobbered.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/poly1305.c |  2 +-
 mpi/ec-inline.h   | 12 ++++++------
 mpi/longlong.h    | 38 ++++++++++++++++++++++----------------
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 8bc65699..8739a4ef 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -194,7 +194,7 @@ static void poly1305_init (poly1305_context_t *ctx,
 	       "adde %2, %5, %2\n" \
 	       : "+r" (A0), "+r" (A1), "+r" (A2) \
 	       : "r" (B0), "r" (B1), "r" (B2) \
-	       : "cc" )
+	       : "cc", "xer" )
 
 #endif /* __powerpc__ */
 
diff --git a/mpi/ec-inline.h b/mpi/ec-inline.h
index 662aa5c3..9edd20e5 100644
--- a/mpi/ec-inline.h
+++ b/mpi/ec-inline.h
@@ -322,7 +322,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc", "r0")
+	   : "cc", "xer", "r0")
 
 #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
   __asm__ ("subfc %2, %8, %5\n" \
@@ -337,7 +337,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc", "r0")
+	   : "cc", "xer", "r0")
 
 #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
   __asm__ ("addc %3, %11, %7\n" \
@@ -356,7 +356,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc")
+	   : "cc", "xer")
 
 #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
   __asm__ ("subfc %3, %11, %7\n" \
@@ -375,7 +375,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc")
+	   : "cc", "xer")
 
 #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
 	                    C4, C3, C2, C1, C0) \
@@ -399,7 +399,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc")
+	   : "cc", "xer")
 
 #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
 	                    C4, C3, C2, C1, C0) \
@@ -423,7 +423,7 @@ LIMB64_HILO(u32 hi, u32 lo)
 	     "r" ((mpi_limb_t)(C2)), \
 	     "r" ((mpi_limb_t)(C1)), \
 	     "r" ((mpi_limb_t)(C0)) \
-	   : "cc")
+	   : "cc", "xer")
 
 #endif /* __powerpc__ */
 
diff --git a/mpi/longlong.h b/mpi/longlong.h
index 46de33a8..453f2704 100644
--- a/mpi/longlong.h
+++ b/mpi/longlong.h
@@ -981,6 +981,12 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
 /***************************************
  **************  PPC  ******************
  ***************************************/
+#if __GNUC__ >= 2 && (defined (_ARCH_PPC) || defined (_ARCH_PPC64) \
+    || defined (__powerpc__) || defined (__powerpc64__))
+# define __PPC_CLOBBER_CC : "cc", "xer"
+#else
+# define __PPC_CLOBBER_CC
+#endif
 /* Powerpc 32 bit support taken from GCC longlong.h. */
 #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
@@ -988,40 +994,40 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
     if (__builtin_constant_p (bh) && (bh) == 0)				\
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
               : "=r" (sh), "=&r" (sl)					\
               : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
   } while (0)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else								\
       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
               : "=r" (sh), "=&r" (sl)					\
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl)			\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
   } while (0)
 # define count_leading_zeros(count, x) \
   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
@@ -1052,40 +1058,40 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
     if (__builtin_constant_p (bh) && (bh) == 0)				\
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
               : "=r" (sh), "=&r" (sl)					\
               : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
   } while (0)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
     else								\
       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
               : "=r" (sh), "=&r" (sl)					\
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl)			\
-              __CLOBBER_CC);						\
+              __PPC_CLOBBER_CC);					\
   } while (0)
 # define count_leading_zeros(count, x) \
   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
-- 
2.53.0




More information about the Gcrypt-devel mailing list