[PATCH 1/2] sm4: tail-call ARM and PPC crypt_blk1_x helpers

Jussi Kivilinna jussi.kivilinna at iki.fi
Sat Jul 4 07:37:32 CEST 2026


* cipher/sm4-aarch64.S (sm4_aarch64_crypt_blk1_4)
(_gcry_sm4_aarch64_crypt_blk1_8): Return zero stack burn size.
* cipher/sm4-armv8-aarch64-ce.S (sm4_armv8_ce_crypt_blk1_4)
(_gcry_sm4_armv8_ce_crypt_blk1_8): Likewise.
* cipher/sm4-armv9-aarch64-sve-ce.S (_gcry_sm4_armv9_sve_ce_crypt):
Likewise.
* cipher/sm4-ppc.c (sm4_ppc_crypt_blk1_16, _gcry_sm4_ppc8le_crypt_blk1_16)
(_gcry_sm4_ppc9le_crypt_blk1_16): Likewise.
* cipher/sm4.c (_gcry_sm4_aarch64_crypt_blk1_8)
(_gcry_sm4_armv8_ce_crypt_blk1_8, _gcry_sm4_armv9_sve_ce_crypt)
(_gcry_sm4_ppc8le_crypt_blk1_16, _gcry_sm4_ppc9le_crypt_blk1_16):
Likewise.
(sm4_aarch64_crypt_blk1_16, sm4_armv8_ce_crypt_blk1_16)
(sm4_armv9_sve_ce_crypt_blk1_16, sm4_ppc8le_crypt_blk1_16)
(sm4_ppc9le_crypt_blk1_16): Tail-call the underlying blk1_x helper.
--

These wrappers called the helper and then returned a constant zero,
which prevents the compiler from emitting a tail call.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/sm4-aarch64.S              |  2 ++
 cipher/sm4-armv8-aarch64-ce.S     |  2 ++
 cipher/sm4-armv9-aarch64-sve-ce.S |  1 +
 cipher/sm4-ppc.c                  | 14 +++++-----
 cipher/sm4.c                      | 43 ++++++++++++++-----------------
 5 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index bab4b4df..c71bf3ca 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -222,6 +222,7 @@ sm4_aarch64_crypt_blk1_4:
 
 .Lblk4_store_output_done:
     VPOP_ABI;
+    mov x0, #0;
     ret_spec_stop;
     CFI_ENDPROC();
 ELF(.size sm4_aarch64_crypt_blk1_4,.-sm4_aarch64_crypt_blk1_4;)
@@ -396,6 +397,7 @@ _gcry_sm4_aarch64_crypt_blk1_8:
     CFI_ADJUST_CFA_OFFSET(-16);
     CFI_RESTORE(x29);
     CFI_RESTORE(x30);
+    mov x0, #0;
     ret_spec_stop;
     CFI_ENDPROC();
 ELF(.size _gcry_sm4_aarch64_crypt_blk1_8,.-_gcry_sm4_aarch64_crypt_blk1_8;)
diff --git a/cipher/sm4-armv8-aarch64-ce.S b/cipher/sm4-armv8-aarch64-ce.S
index 01f3df92..5917ad5b 100644
--- a/cipher/sm4-armv8-aarch64-ce.S
+++ b/cipher/sm4-armv8-aarch64-ce.S
@@ -335,6 +335,7 @@ sm4_armv8_ce_crypt_blk1_4:
     st1 {v3.16b}, [x1];
 
 .Lblk4_store_output_done:
+    mov x0, #0;
     ret_spec_stop;
     CFI_ENDPROC();
 ELF(.size sm4_armv8_ce_crypt_blk1_4,.-sm4_armv8_ce_crypt_blk1_4;)
@@ -385,6 +386,7 @@ _gcry_sm4_armv8_ce_crypt_blk1_8:
 
 .Lblk8_store_output_done:
     CLEAR_ALL_REGS();
+    mov x0, #0;
     ret_spec_stop;
     CFI_ENDPROC();
 ELF(.size _gcry_sm4_armv8_ce_crypt_blk1_8,.-_gcry_sm4_armv8_ce_crypt_blk1_8;)
diff --git a/cipher/sm4-armv9-aarch64-sve-ce.S b/cipher/sm4-armv9-aarch64-sve-ce.S
index 7367cd28..ba9bde84 100644
--- a/cipher/sm4-armv9-aarch64-sve-ce.S
+++ b/cipher/sm4-armv9-aarch64-sve-ce.S
@@ -444,6 +444,7 @@ _gcry_sm4_armv9_sve_ce_crypt:
     cbnz        x3, .Lcrypt_tail;
 
 .Lcrypt_end:
+    mov         x0, #0;
     ret_spec_stop;
     CFI_ENDPROC();
 ELF(.size _gcry_sm4_armv9_sve_ce_crypt,.-_gcry_sm4_armv9_sve_ce_crypt;)
diff --git a/cipher/sm4-ppc.c b/cipher/sm4-ppc.c
index 2b26c39d..9d9227c0 100644
--- a/cipher/sm4-ppc.c
+++ b/cipher/sm4-ppc.c
@@ -293,7 +293,7 @@ sm4_ppc_crypt_blk1_4(u32 *rk, byte *out, const byte *in, size_t nblks)
     vec_xst((vector16x_u8)vec_revb(ra0), 0, out + 3 * 16);
 }
 
-static ASM_FUNC_ATTR_INLINE void
+static ASM_FUNC_ATTR_INLINE unsigned int
 sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks)
 {
   if (nblks >= 16)
@@ -321,25 +321,27 @@ sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks)
     }
 
   clear_vec_regs();
+
+  return 0;
 }
 
-ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 void
+ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 unsigned int
 _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
 			       size_t nblks)
 {
-  sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
+  return sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
 }
 
-ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P9 void
+ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P9 unsigned int
 _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
 			       size_t nblks)
 {
 #ifdef HAVE_FUNC_ATTR_TARGET
   /* Inline for POWER9 target optimization. */
-  sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
+  return sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
 #else
   /* Target selecting not working, just call the other noinline function. */
-  _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, nblks);
+  return _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, nblks);
 #endif
 }
 
diff --git a/cipher/sm4.c b/cipher/sm4.c
index 165a66b2..cc98b676 100644
--- a/cipher/sm4.c
+++ b/cipher/sm4.c
@@ -635,9 +635,9 @@ extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out,
 				      byte *iv,
 				      size_t nblocks);
 
-extern void _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out,
-					   const byte *in,
-					   size_t num_blocks);
+extern unsigned int _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out,
+						   const byte *in,
+						   size_t num_blocks);
 
 static inline unsigned int
 sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in,
@@ -651,8 +651,7 @@ sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in,
       num_blks -= 8;
     }
 
-  _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
-  return 0;
+  return _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
 }
 
 #endif /* USE_AARCH64_SIMD */
@@ -686,9 +685,9 @@ extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out,
 					 byte *tweak,
 					 size_t nblocks);
 
-extern void _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out,
-					    const byte *in,
-					    size_t num_blocks);
+extern unsigned int _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out,
+						    const byte *in,
+						    size_t num_blocks);
 
 static inline unsigned int
 sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
@@ -702,16 +701,15 @@ sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
       num_blks -= 8;
     }
 
-  _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
-  return 0;
+  return _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
 }
 
 #endif /* USE_ARM_CE */
 
 #ifdef USE_ARM_SVE_CE
-extern void _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out,
-					 const byte *in,
-					 size_t nblocks);
+extern unsigned int _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out,
+						 const byte *in,
+						 size_t nblocks);
 
 extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out,
 					   const byte *in,
@@ -732,32 +730,31 @@ static inline unsigned int
 sm4_armv9_sve_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
 			       size_t num_blks)
 {
-  _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
-  return 0;
+  return _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
 }
 
 extern unsigned int _gcry_sm4_armv9_sve_get_vl(void);
 #endif /* USE_ARM_SVE_CE */
 
 #ifdef USE_PPC_CRYPTO
-extern void _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
-					   size_t num_blks);
+extern unsigned int _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out,
+						   const byte *in,
+						   size_t num_blks);
 
-extern void _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
-					   size_t num_blks);
+extern unsigned int _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out,
+						   const byte *in,
+						   size_t num_blks);
 
 static inline unsigned int
 sm4_ppc8le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
 {
-  _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks);
-  return 0;
+  return _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks);
 }
 
 static inline unsigned int
 sm4_ppc9le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
 {
-  _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks);
-  return 0;
+  return _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks);
 }
 #endif /* USE_PPC_CRYPTO */
 
-- 
2.53.0




More information about the Gcrypt-devel mailing list