[PATCH] rijndael-aesni: interleave last CTR encryption round with xoring
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sat Dec 1 12:39:21 CET 2018
* cipher/rijndael-aesni.c (do_aesni_ctr_8): Interleave aesenclast
with input xoring.
--
Structure of 'aesenclast' instruction allows reordering last
encryption round and xoring of input block for small ~0.5%
improvement in performance.
Intel i7-4970K @ 4.0 Ghz:
AES | nanosecs/byte mebibytes/sec cycles/byte
CTR enc | 0.159 ns/B 6002 MiB/s 0.636 c/B
CTR dec | 0.159 ns/B 6001 MiB/s 0.636 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/rijndael-aesni.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 483387cde..ec9f4d4a5 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -1657,14 +1657,6 @@ do_aesni_ctr_8 (const RIJNDAEL_context *ctx,
"movdqa 0xe0(%[key]), %%xmm1\n"
".Lenclast%=:\n\t"
- "aesenclast %%xmm1, %%xmm0\n\t"
- "aesenclast %%xmm1, %%xmm2\n\t"
- "aesenclast %%xmm1, %%xmm3\n\t"
- "aesenclast %%xmm1, %%xmm4\n\t"
- "aesenclast %%xmm1, %%xmm8\n\t"
- "aesenclast %%xmm1, %%xmm9\n\t"
- "aesenclast %%xmm1, %%xmm10\n\t"
- "aesenclast %%xmm1, %%xmm11\n\t"
:
: [key] "r" (ctx->keyschenc),
[rounds] "r" (ctx->rounds)
@@ -1674,22 +1666,30 @@ do_aesni_ctr_8 (const RIJNDAEL_context *ctx,
"movdqu 1*16(%[src]), %%xmm13\n\t" /* Get block 2. */
"movdqu 2*16(%[src]), %%xmm14\n\t" /* Get block 3. */
"movdqu 3*16(%[src]), %%xmm15\n\t" /* Get block 4. */
- "movdqu 4*16(%[src]), %%xmm1\n\t" /* Get block 5. */
- "pxor %%xmm12, %%xmm0\n\t" /* EncCTR-1 ^= input */
+ "movdqu 4*16(%[src]), %%xmm7\n\t" /* Get block 5. */
+ "pxor %%xmm1, %%xmm12\n\t" /* block1 ^= lastkey */
+ "aesenclast %%xmm12, %%xmm0\n\t"
"movdqu 5*16(%[src]), %%xmm12\n\t" /* Get block 6. */
- "pxor %%xmm13, %%xmm2\n\t" /* EncCTR-2 ^= input */
+ "pxor %%xmm1, %%xmm13\n\t" /* block2 ^= lastkey */
+ "aesenclast %%xmm13, %%xmm2\n\t"
"movdqu 6*16(%[src]), %%xmm13\n\t" /* Get block 7. */
- "pxor %%xmm14, %%xmm3\n\t" /* EncCTR-3 ^= input */
+ "pxor %%xmm1, %%xmm14\n\t" /* block3 ^= lastkey */
+ "aesenclast %%xmm14, %%xmm3\n\t"
"movdqu 7*16(%[src]), %%xmm14\n\t" /* Get block 8. */
- "pxor %%xmm15, %%xmm4\n\t" /* EncCTR-4 ^= input */
+ "pxor %%xmm1, %%xmm15\n\t" /* block4 ^= lastkey */
+ "aesenclast %%xmm15, %%xmm4\n\t"
"movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */
- "pxor %%xmm1, %%xmm8\n\t" /* EncCTR-5 ^= input */
+ "pxor %%xmm1, %%xmm7\n\t" /* block5 ^= lastkey */
+ "aesenclast %%xmm7, %%xmm8\n\t"
"movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */
- "pxor %%xmm12, %%xmm9\n\t" /* EncCTR-6 ^= input */
+ "pxor %%xmm1, %%xmm12\n\t" /* block6 ^= lastkey */
+ "aesenclast %%xmm12, %%xmm9\n\t"
"movdqu %%xmm2, 1*16(%[dst])\n\t" /* Store block 2. */
- "pxor %%xmm13, %%xmm10\n\t" /* EncCTR-7 ^= input */
+ "pxor %%xmm1, %%xmm13\n\t" /* block7 ^= lastkey */
+ "aesenclast %%xmm13, %%xmm10\n\t"
"movdqu %%xmm3, 2*16(%[dst])\n\t" /* Store block 3. */
- "pxor %%xmm14, %%xmm11\n\t" /* EncCTR-8 ^= input */
+ "pxor %%xmm1, %%xmm14\n\t" /* block8 ^= lastkey */
+ "aesenclast %%xmm14, %%xmm11\n\t"
"movdqu %%xmm4, 3*16(%[dst])\n\t" /* Store block 4. */
"movdqu %%xmm8, 4*16(%[dst])\n\t" /* Store block 8. */
"movdqu %%xmm9, 5*16(%[dst])\n\t" /* Store block 9. */
More information about the Gcrypt-devel
mailing list