[PATCH 1/2] sm4: tail-call ARM and PPC crypt_blk1_x helpers
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sat Jul 4 07:37:32 CEST 2026
* cipher/sm4-aarch64.S (sm4_aarch64_crypt_blk1_4)
(_gcry_sm4_aarch64_crypt_blk1_8): Return zero stack burn size.
* cipher/sm4-armv8-aarch64-ce.S (sm4_armv8_ce_crypt_blk1_4)
(_gcry_sm4_armv8_ce_crypt_blk1_8): Likewise.
* cipher/sm4-armv9-aarch64-sve-ce.S (_gcry_sm4_armv9_sve_ce_crypt):
Likewise.
* cipher/sm4-ppc.c (sm4_ppc_crypt_blk1_16, _gcry_sm4_ppc8le_crypt_blk1_16)
(_gcry_sm4_ppc9le_crypt_blk1_16): Likewise.
* cipher/sm4.c (_gcry_sm4_aarch64_crypt_blk1_8)
(_gcry_sm4_armv8_ce_crypt_blk1_8, _gcry_sm4_armv9_sve_ce_crypt)
(_gcry_sm4_ppc8le_crypt_blk1_16, _gcry_sm4_ppc9le_crypt_blk1_16):
Likewise.
(sm4_aarch64_crypt_blk1_16, sm4_armv8_ce_crypt_blk1_16)
(sm4_armv9_sve_ce_crypt_blk1_16, sm4_ppc8le_crypt_blk1_16)
(sm4_ppc9le_crypt_blk1_16): Tail-call the underlying blk1_x helper.
--
These wrappers called the helper and then returned a constant zero,
which prevents the compiler from emitting a tail call.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/sm4-aarch64.S | 2 ++
cipher/sm4-armv8-aarch64-ce.S | 2 ++
cipher/sm4-armv9-aarch64-sve-ce.S | 1 +
cipher/sm4-ppc.c | 14 +++++-----
cipher/sm4.c | 43 ++++++++++++++-----------------
5 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index bab4b4df..c71bf3ca 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -222,6 +222,7 @@ sm4_aarch64_crypt_blk1_4:
.Lblk4_store_output_done:
VPOP_ABI;
+ mov x0, #0;
ret_spec_stop;
CFI_ENDPROC();
ELF(.size sm4_aarch64_crypt_blk1_4,.-sm4_aarch64_crypt_blk1_4;)
@@ -396,6 +397,7 @@ _gcry_sm4_aarch64_crypt_blk1_8:
CFI_ADJUST_CFA_OFFSET(-16);
CFI_RESTORE(x29);
CFI_RESTORE(x30);
+ mov x0, #0;
ret_spec_stop;
CFI_ENDPROC();
ELF(.size _gcry_sm4_aarch64_crypt_blk1_8,.-_gcry_sm4_aarch64_crypt_blk1_8;)
diff --git a/cipher/sm4-armv8-aarch64-ce.S b/cipher/sm4-armv8-aarch64-ce.S
index 01f3df92..5917ad5b 100644
--- a/cipher/sm4-armv8-aarch64-ce.S
+++ b/cipher/sm4-armv8-aarch64-ce.S
@@ -335,6 +335,7 @@ sm4_armv8_ce_crypt_blk1_4:
st1 {v3.16b}, [x1];
.Lblk4_store_output_done:
+ mov x0, #0;
ret_spec_stop;
CFI_ENDPROC();
ELF(.size sm4_armv8_ce_crypt_blk1_4,.-sm4_armv8_ce_crypt_blk1_4;)
@@ -385,6 +386,7 @@ _gcry_sm4_armv8_ce_crypt_blk1_8:
.Lblk8_store_output_done:
CLEAR_ALL_REGS();
+ mov x0, #0;
ret_spec_stop;
CFI_ENDPROC();
ELF(.size _gcry_sm4_armv8_ce_crypt_blk1_8,.-_gcry_sm4_armv8_ce_crypt_blk1_8;)
diff --git a/cipher/sm4-armv9-aarch64-sve-ce.S b/cipher/sm4-armv9-aarch64-sve-ce.S
index 7367cd28..ba9bde84 100644
--- a/cipher/sm4-armv9-aarch64-sve-ce.S
+++ b/cipher/sm4-armv9-aarch64-sve-ce.S
@@ -444,6 +444,7 @@ _gcry_sm4_armv9_sve_ce_crypt:
cbnz x3, .Lcrypt_tail;
.Lcrypt_end:
+ mov x0, #0;
ret_spec_stop;
CFI_ENDPROC();
ELF(.size _gcry_sm4_armv9_sve_ce_crypt,.-_gcry_sm4_armv9_sve_ce_crypt;)
diff --git a/cipher/sm4-ppc.c b/cipher/sm4-ppc.c
index 2b26c39d..9d9227c0 100644
--- a/cipher/sm4-ppc.c
+++ b/cipher/sm4-ppc.c
@@ -293,7 +293,7 @@ sm4_ppc_crypt_blk1_4(u32 *rk, byte *out, const byte *in, size_t nblks)
vec_xst((vector16x_u8)vec_revb(ra0), 0, out + 3 * 16);
}
-static ASM_FUNC_ATTR_INLINE void
+static ASM_FUNC_ATTR_INLINE unsigned int
sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks)
{
if (nblks >= 16)
@@ -321,25 +321,27 @@ sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks)
}
clear_vec_regs();
+
+ return 0;
}
-ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 void
+ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 unsigned int
_gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
size_t nblks)
{
- sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
+ return sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
}
-ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P9 void
+ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P9 unsigned int
_gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
size_t nblks)
{
#ifdef HAVE_FUNC_ATTR_TARGET
/* Inline for POWER9 target optimization. */
- sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
+ return sm4_ppc_crypt_blk1_16(rk, out, in, nblks);
#else
/* Target selecting not working, just call the other noinline function. */
- _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, nblks);
+ return _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, nblks);
#endif
}
diff --git a/cipher/sm4.c b/cipher/sm4.c
index 165a66b2..cc98b676 100644
--- a/cipher/sm4.c
+++ b/cipher/sm4.c
@@ -635,9 +635,9 @@ extern void _gcry_sm4_aarch64_cfb_dec(const u32 *rk_enc, byte *out,
byte *iv,
size_t nblocks);
-extern void _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out,
- const byte *in,
- size_t num_blocks);
+extern unsigned int _gcry_sm4_aarch64_crypt_blk1_8(u32 *rk, byte *out,
+ const byte *in,
+ size_t num_blocks);
static inline unsigned int
sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in,
@@ -651,8 +651,7 @@ sm4_aarch64_crypt_blk1_16(void *rk, byte *out, const byte *in,
num_blks -= 8;
}
- _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
- return 0;
+ return _gcry_sm4_aarch64_crypt_blk1_8(rk, out, in, num_blks);
}
#endif /* USE_AARCH64_SIMD */
@@ -686,9 +685,9 @@ extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out,
byte *tweak,
size_t nblocks);
-extern void _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out,
- const byte *in,
- size_t num_blocks);
+extern unsigned int _gcry_sm4_armv8_ce_crypt_blk1_8(u32 *rk, byte *out,
+ const byte *in,
+ size_t num_blocks);
static inline unsigned int
sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
@@ -702,16 +701,15 @@ sm4_armv8_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
num_blks -= 8;
}
- _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
- return 0;
+ return _gcry_sm4_armv8_ce_crypt_blk1_8(rk, out, in, num_blks);
}
#endif /* USE_ARM_CE */
#ifdef USE_ARM_SVE_CE
-extern void _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out,
- const byte *in,
- size_t nblocks);
+extern unsigned int _gcry_sm4_armv9_sve_ce_crypt(u32 *rk, byte *out,
+ const byte *in,
+ size_t nblocks);
extern void _gcry_sm4_armv9_sve_ce_ctr_enc(const u32 *rk_enc, byte *out,
const byte *in,
@@ -732,32 +730,31 @@ static inline unsigned int
sm4_armv9_sve_ce_crypt_blk1_16(void *rk, byte *out, const byte *in,
size_t num_blks)
{
- _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
- return 0;
+ return _gcry_sm4_armv9_sve_ce_crypt(rk, out, in, num_blks);
}
extern unsigned int _gcry_sm4_armv9_sve_get_vl(void);
#endif /* USE_ARM_SVE_CE */
#ifdef USE_PPC_CRYPTO
-extern void _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
- size_t num_blks);
+extern unsigned int _gcry_sm4_ppc8le_crypt_blk1_16(u32 *rk, byte *out,
+ const byte *in,
+ size_t num_blks);
-extern void _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out, const byte *in,
- size_t num_blks);
+extern unsigned int _gcry_sm4_ppc9le_crypt_blk1_16(u32 *rk, byte *out,
+ const byte *in,
+ size_t num_blks);
static inline unsigned int
sm4_ppc8le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
{
- _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks);
- return 0;
+ return _gcry_sm4_ppc8le_crypt_blk1_16(rk, out, in, num_blks);
}
static inline unsigned int
sm4_ppc9le_crypt_blk1_16(void *rk, byte *out, const byte *in, size_t num_blks)
{
- _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks);
- return 0;
+ return _gcry_sm4_ppc9le_crypt_blk1_16(rk, out, in, num_blks);
}
#endif /* USE_PPC_CRYPTO */
--
2.53.0
More information about the Gcrypt-devel
mailing list