[PATCH 06/10] Add parallelized AES-NI ECB decryption
    Jussi Kivilinna 
    jussi.kivilinna at mbnet.fi
       
    Fri Nov 23 18:22:20 CET 2012
    
    
  
* cipher/cipher-internal.h (struct gcry_cipher_handle): Add
bulk.ecb_dec.
* cipher/cipher.c (gcry_cipher_open) [USE_AES]: Set bulk.ecb_dec
to _gcry_aes_ecb_dec.
(do_ecb_decrypt): Redirect call into bulk.ecb_dec if non-null.
* src/cipher.h (_gcry_aes_ecb_dec): Add new function prototype.
* cipher/rijndeal.c (_gcry_aes_ecb_dec): Add new function.
--
Parallelized ECB decryption is ~2.0x faster on Intel Sandy-Bridge (x86-64).
Before:
$ tests/benchmark --cipher-repetitions 1000 cipher aes aes192 aes256
Running each test 1000 times.
                ECB/Stream         CBC             CFB             OFB             CTR
             --------------- --------------- --------------- --------------- ---------------
AES            670ms   770ms  2130ms   450ms  1880ms   670ms  2250ms  2280ms   490ms   490ms
AES192         880ms   920ms  2460ms   540ms  2210ms   830ms  2580ms  2570ms   580ms   570ms
AES256        1020ms  1070ms  2800ms   620ms  2560ms   970ms  2880ms  2880ms   660ms   650ms
After:
Running each test 1000 times.
                ECB/Stream         CBC             CFB             OFB             CTR
             --------------- --------------- --------------- --------------- ---------------
AES            690ms   350ms  2130ms   470ms  1890ms   670ms  2220ms  2240ms   490ms   490ms
AES192         900ms   440ms  2460ms   560ms  2210ms   840ms  2550ms  2560ms   570ms   570ms
AES256        1040ms   520ms  2800ms   640ms  2550ms   970ms  2840ms  2850ms   660ms   650ms
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>
---
 cipher/cipher-internal.h |    3 ++
 cipher/cipher.c          |    8 +++++
 cipher/rijndael.c        |   70 ++++++++++++++++++++++++++++++++++++++++++++++
 src/cipher.h             |    2 +
 4 files changed, 83 insertions(+)
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 025bf2e..dcce708 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -89,6 +89,9 @@ struct gcry_cipher_handle
     void (*ctr_enc)(void *context, unsigned char *iv,
                     void *outbuf_arg, const void *inbuf_arg,
                     unsigned int nblocks);
+    void (*ecb_dec)(void *context, void *outbuf_arg,
+                    const void *inbuf_arg,
+                    unsigned int nblocks);
   } bulk;
 
 
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 389bf7a..b0f9773 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -716,6 +716,7 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
               h->bulk.cbc_enc = _gcry_aes_cbc_enc;
               h->bulk.cbc_dec = _gcry_aes_cbc_dec;
               h->bulk.ctr_enc = _gcry_aes_ctr_enc;
+              h->bulk.ecb_dec = _gcry_aes_ecb_dec;
               break;
 #endif /*USE_AES*/
 
@@ -881,6 +882,13 @@ do_ecb_decrypt (gcry_cipher_hd_t c,
     return GPG_ERR_INV_LENGTH;
   nblocks = inbuflen / c->cipher->blocksize;
 
+  if (nblocks && c->bulk.ecb_dec)
+    {
+      c->bulk.ecb_dec (&c->context.c, outbuf, inbuf, nblocks);
+
+      return 0;
+    }
+
   for (n=0; n < nblocks; n++ )
     {
       c->cipher->decrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf );
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 34a0f8c..421b159 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1838,6 +1838,76 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
 }
 
 
+/* Bulk decryption of complete blocks in ECB mode.  This function is only
+ * intended for the bulk encryption feature of cipher.c.  */
+void
+_gcry_aes_ecb_dec (void *context, void *outbuf_arg,
+		   const void *inbuf_arg, unsigned int nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+
+  if (0)
+    ;
+#ifdef USE_AESNI
+  else if (ctx->use_aesni)
+    {
+      aesni_prepare ();
+
+      if (!ctx->decryption_prepared )
+        {
+          prepare_decryption ( ctx );
+          ctx->decryption_prepared = 1;
+        }
+
+      for ( ;nblocks > 3 ; nblocks -= 4 )
+        {
+          asm volatile
+            ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */
+             "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+             "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+             "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+             : /* No output */
+             : [inbuf] "r" (inbuf)
+             : "memory");
+
+          do_aesni_dec_vec4 (ctx);
+
+          asm volatile
+            ("movdqu %%xmm1, 0*16(%[outbuf])\n\t" /* store output blocks */
+             "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+             "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+             "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+             : /* No output */
+             : [outbuf] "r" (outbuf)
+             : "memory");
+          
+          outbuf += 4*BLOCKSIZE;
+          inbuf  += 4*BLOCKSIZE;
+        }
+
+      for ( ;nblocks; nblocks-- )
+        {
+          do_aesni_dec_aligned (ctx, outbuf, inbuf);
+
+          inbuf += BLOCKSIZE;
+          outbuf += BLOCKSIZE;
+        }
+
+      aesni_cleanup ();
+      aesni_cleanup_2_5 ();
+    }
+#endif
+  else
+    for ( ;nblocks; nblocks-- )
+      {
+        rijndael_decrypt(context, outbuf, inbuf);
+        inbuf += BLOCKSIZE;
+        outbuf += BLOCKSIZE;
+      }
+}
+
 
 
 /* Run the self-tests for AES 128.  Returns NULL on success. */
diff --git a/src/cipher.h b/src/cipher.h
index 48eeeda..6b34e90 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -94,6 +94,8 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
 void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
                         void *outbuf_arg, const void *inbuf_arg,
                         unsigned int nblocks);
+void _gcry_aes_ecb_dec (void *context, void *outbuf_arg,
+                        const void *inbuf_arg, unsigned int nblocks);
 
 
 /*-- dsa.c --*/
    
    
More information about the Gcrypt-devel
mailing list