[PATCH 3/8] camellia-aesni-avx: add acceleration for ECB/XTS/CTR32LE modes
Jussi Kivilinna
jussi.kivilinna at iki.fi
Wed Feb 22 20:29:19 CET 2023
* cipher/camellia-aesni-avx-amd64.S (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
* cipher/camellia-glue.c (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
(camellia_setkey): Always enable XTS/ECB/CTR32LE bulk functions.
(camellia_encrypt_blk1_32, camellia_decrypt_blk1_32)
[USE_AESNI_AVX]: Add AESNI/AVX code-path.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/camellia-aesni-avx-amd64.S | 92 +++++++++++++++++++++++++++++++
cipher/camellia-glue.c | 59 ++++++++++++++------
2 files changed, 133 insertions(+), 18 deletions(-)
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 1f241e03..93c96791 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -1029,6 +1029,98 @@ _gcry_camellia_aesni_avx_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
+.align 16
+.globl _gcry_camellia_aesni_avx_ecb_enc
+ELF(.type _gcry_camellia_aesni_avx_ecb_enc, at function;)
+
+_gcry_camellia_aesni_avx_ecb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX));
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_enc_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_enc,.-_gcry_camellia_aesni_avx_ecb_enc;)
+
+.align 16
+.globl _gcry_camellia_aesni_avx_ecb_dec
+ELF(.type _gcry_camellia_aesni_avx_ecb_dec, at function;)
+
+_gcry_camellia_aesni_avx_ecb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_dec_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_dec,.-_gcry_camellia_aesni_avx_ecb_dec;)
+
.align 16
.globl _gcry_camellia_aesni_avx_cbc_dec
ELF(.type _gcry_camellia_aesni_avx_cbc_dec, at function;)
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 2e00f563..8b4b4b3c 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -172,15 +172,25 @@ extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
const u64 Ls[16]) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
- const unsigned char *abuf,
- unsigned char *offset,
- unsigned char *checksum,
- const u64 Ls[16]) ASM_FUNC_ABI;
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
const unsigned char *key,
unsigned int keylen) ASM_FUNC_ABI;
+extern void _gcry_camellia_aesni_avx_ecb_enc(const CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+ ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ecb_dec(const CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+ ASM_FUNC_ABI;
+
static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 +
2 * sizeof(void *) + ASM_EXTRA_STACK;
@@ -473,18 +483,9 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
bulk_ops->ctr_enc = _gcry_camellia_ctr_enc;
bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt;
bulk_ops->ocb_auth = _gcry_camellia_ocb_auth;
-#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2)
- {
- bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
- bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
- bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
- }
-#else
- (void)_gcry_camellia_xts_crypt;
- (void)_gcry_camellia_ecb_crypt;
- (void)_gcry_camellia_ctr32le_enc;
-#endif
+ bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
+ bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
+ bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
if (0)
{ }
@@ -651,10 +652,21 @@ camellia_encrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
return avx2_burn_stack_depth;
}
#endif
+#ifdef USE_AESNI_AVX
+ while (ctx->use_aesni_avx && num_blks >= 16)
+ {
+ _gcry_camellia_aesni_avx_ecb_enc (ctx, outbuf, inbuf);
+ stack_burn_size = avx_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{
- stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf);
+ unsigned int nburn = camellia_encrypt((void *)ctx, outbuf, inbuf);
+ stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
outbuf += CAMELLIA_BLOCK_SIZE;
inbuf += CAMELLIA_BLOCK_SIZE;
num_blks--;
@@ -731,10 +743,21 @@ camellia_decrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
return avx2_burn_stack_depth;
}
#endif
+#ifdef USE_AESNI_AVX
+ while (ctx->use_aesni_avx && num_blks >= 16)
+ {
+ _gcry_camellia_aesni_avx_ecb_dec (ctx, outbuf, inbuf);
+ stack_burn_size = avx_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{
- stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf);
+ unsigned int nburn = camellia_decrypt((void *)ctx, outbuf, inbuf);
+ stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
outbuf += CAMELLIA_BLOCK_SIZE;
inbuf += CAMELLIA_BLOCK_SIZE;
num_blks--;
--
2.37.2
More information about the Gcrypt-devel
mailing list