[PATCH] Add size optimized cipher block copy and xor functions

Jussi Kivilinna jussi.kivilinna at iki.fi
Sat Jul 21 12:11:24 CEST 2018


* cipher/bufhelp.h (buf_get_he32, buf_put_he32, buf_get_he64)
(buf_put_he64): New.
* cipher/cipher-internal.h (cipher_block_cpy, cipher_block_xor)
(cipher_block_xor_1, cipher_block_xor_2dst, cipher_block_xor_n_copy_2)
(cipher_block_xor_n_copy): New.
* cipher/cipher-gcm-intel-pclmul.c
(_gcry_ghash_setup_intel_pclmul): Use assembly for swapping endianness
instead of buf_get_be64 and buf_cpy.
* cipher/blowfish.c: Use new cipher_block_* functions for cipher block
sized buf_cpy/xor* operations.
* cipher/camellia-glue.c: Ditto.
* cipher/cast5.c: Ditto.
* cipher/cipher-aeswrap.c: Ditto.
* cipher/cipher-cbc.c: Ditto.
* cipher/cipher-ccm.c: Ditto.
* cipher/cipher-cfb.c: Ditto.
* cipher/cipher-cmac.c: Ditto.
* cipher/cipher-ctr.c: Ditto.
* cipher/cipher-eax.c: Ditto.
* cipher/cipher-gcm.c: Ditto.
* cipher/cipher-ocb.c: Ditto.
* cipher/cipher-ofb.c: Ditto.
* cipher/cipher-xts.c: Ditto.
* cipher/des.c: Ditto.
* cipher/rijndael.c: Ditto.
* cipher/serpent.c: Ditto.
* cipher/twofish.c: Ditto.
--

This commit adds size-optimized functions for copying and xoring
cipher block sized buffers. These functions also allow GCC to use
inline auto-vectorization for block cipher copying and xoring on
higher optimization levels.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/blowfish.c                |   7 +-
 cipher/bufhelp.h                 |  16 +++-
 cipher/camellia-glue.c           |   7 +-
 cipher/cast5.c                   |   7 +-
 cipher/cipher-aeswrap.c          |   4 +-
 cipher/cipher-cbc.c              |  15 ++--
 cipher/cipher-ccm.c              |   7 +-
 cipher/cipher-cfb.c              |  16 ++--
 cipher/cipher-cmac.c             |   8 +-
 cipher/cipher-ctr.c              |  50 +++++++----
 cipher/cipher-eax.c              |  11 +--
 cipher/cipher-gcm-intel-pclmul.c |  21 +++--
 cipher/cipher-gcm.c              |  14 +--
 cipher/cipher-internal.h         | 141 +++++++++++++++++++++++++++++++
 cipher/cipher-ocb.c              |  81 +++++++++---------
 cipher/cipher-ofb.c              |   2 +-
 cipher/cipher-xts.c              |  14 +--
 cipher/des.c                     |   7 +-
 cipher/rijndael.c                |  43 +++++-----
 cipher/serpent.c                 |   7 +-
 cipher/twofish.c                 |   6 +-
 21 files changed, 335 insertions(+), 149 deletions(-)

diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index 2d9182009..f032c5c6f 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -37,6 +37,7 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 #define BLOWFISH_BLOCKSIZE 8
@@ -660,7 +661,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
       /* Increment the counter.  */
@@ -728,7 +729,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       inbuf += BLOWFISH_BLOCKSIZE;
       outbuf += BLOWFISH_BLOCKSIZE;
     }
@@ -785,7 +786,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
     }
diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index 83d3f53aa..4e97c4d41 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -450,7 +450,21 @@ static inline void buf_put_le64(void *_buf, u64 val)
   out->a = le_bswap64(val);
 }
 
-
 #endif /*BUFHELP_UNALIGNED_ACCESS*/
 
+
+/* Host-endian get/put macros */
+#ifdef WORDS_BIGENDIAN
+# define buf_get_he32 buf_get_be32
+# define buf_put_he32 buf_put_be32
+# define buf_get_he64 buf_get_be64
+# define buf_put_he64 buf_put_be64
+#else
+# define buf_get_he32 buf_get_le32
+# define buf_put_he32 buf_put_le32
+# define buf_get_he64 buf_get_le64
+# define buf_put_he64 buf_put_le64
+#endif
+
+
 #endif /*GCRYPT_BUFHELP_H*/
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 22df21469..69b240b79 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -430,7 +430,7 @@ _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
       /* Encrypt the counter. */
       Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf  += CAMELLIA_BLOCK_SIZE;
       /* Increment the counter.  */
@@ -523,7 +523,8 @@ _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
          the intermediate result to SAVEBUF.  */
       Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                CAMELLIA_BLOCK_SIZE);
       inbuf += CAMELLIA_BLOCK_SIZE;
       outbuf += CAMELLIA_BLOCK_SIZE;
     }
@@ -605,7 +606,7 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
   for ( ;nblocks; nblocks-- )
     {
       Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf  += CAMELLIA_BLOCK_SIZE;
     }
diff --git a/cipher/cast5.c b/cipher/cast5.c
index e7d324b25..49e8b781b 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -44,6 +44,7 @@
 #include "cipher.h"
 #include "bithelp.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
@@ -634,7 +635,7 @@ _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
       /* Increment the counter.  */
@@ -702,7 +703,7 @@ _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
       inbuf += CAST5_BLOCKSIZE;
       outbuf += CAST5_BLOCKSIZE;
     }
@@ -758,7 +759,7 @@ _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
     }
diff --git a/cipher/cipher-aeswrap.c b/cipher/cipher-aeswrap.c
index a8d0e03ee..c182657e1 100644
--- a/cipher/cipher-aeswrap.c
+++ b/cipher/cipher-aeswrap.c
@@ -99,7 +99,7 @@ _gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
 		break;
 	    }
           /* A := MSB_64(B) ^ t */
-	  buf_xor(a, b, t, 8);
+	  cipher_block_xor(a, b, t, 8);
           /* R[i] := LSB_64(B) */
           memcpy (r+i*8, b+8, 8);
         }
@@ -170,7 +170,7 @@ _gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c,
       for (i = n; i >= 1; i--)
         {
           /* B := AES_k^1( (A ^ t)| R[i] ) */
-	  buf_xor(b, a, t, 8);
+	  cipher_block_xor(b, a, t, 8);
           memcpy (b+8, r+(i-1)*8, 8);
           nburn = c->spec->decrypt (&c->context.c, b, b);
           burn = nburn > burn ? nburn : burn;
diff --git a/cipher/cipher-cbc.c b/cipher/cipher-cbc.c
index 2ad39d09d..d4df1e72a 100644
--- a/cipher/cipher-cbc.c
+++ b/cipher/cipher-cbc.c
@@ -56,7 +56,7 @@ cbc_encrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
 
       for (n=0; n < nblocks; n++ )
         {
-          buf_xor (outbuf, inbuf, ivp, blocksize);
+          cipher_block_xor (outbuf, inbuf, ivp, blocksize);
           nburn = enc_fn ( &c->context.c, outbuf, outbuf );
           burn = nburn > burn ? nburn : burn;
           ivp = outbuf;
@@ -66,7 +66,7 @@ cbc_encrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
         }
 
       if (ivp != c->u_iv.iv)
-        buf_cpy (c->u_iv.iv, ivp, blocksize);
+        cipher_block_cpy (c->u_iv.iv, ivp, blocksize);
     }
 
   return burn;
@@ -156,7 +156,7 @@ _gcry_cipher_cbc_cts_encrypt (gcry_cipher_hd_t c,
 
       nburn = enc_fn (&c->context.c, outbuf, outbuf);
       burn = nburn > burn ? nburn : burn;
-      buf_cpy (c->u_iv.iv, outbuf, blocksize);
+      cipher_block_cpy (c->u_iv.iv, outbuf, blocksize);
     }
 
   if (burn > 0)
@@ -190,7 +190,8 @@ cbc_decrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
              storage here because it is not used otherwise.  */
           nburn = dec_fn ( &c->context.c, c->lastiv, inbuf );
           burn = nburn > burn ? nburn : burn;
-          buf_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf,
+                                     blocksize);
           inbuf  += blocksize;
           outbuf += blocksize;
         }
@@ -252,7 +253,7 @@ _gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c,
       nblocks--;
       if ((inbuflen & blocksize_mask) == 0)
 	nblocks--;
-      buf_cpy (c->lastiv, c->u_iv.iv, blocksize);
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize);
     }
 
   burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize);
@@ -268,7 +269,7 @@ _gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c,
       else
         restbytes = inbuflen & blocksize_mask;
 
-      buf_cpy (c->lastiv, c->u_iv.iv, blocksize );         /* Save Cn-2. */
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */
       buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */
 
       nburn = dec_fn ( &c->context.c, outbuf, inbuf );
@@ -280,7 +281,7 @@ _gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c,
         c->u_iv.iv[i] = outbuf[i];
       nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv);
       burn = nburn > burn ? nburn : burn;
-      buf_xor(outbuf, outbuf, c->lastiv, blocksize);
+      cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize);
       /* c->lastiv is now really lastlastiv, does this matter? */
     }
 
diff --git a/cipher/cipher-ccm.c b/cipher/cipher-ccm.c
index d7f14d890..e71c6f159 100644
--- a/cipher/cipher-ccm.c
+++ b/cipher/cipher-ccm.c
@@ -67,7 +67,8 @@ do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen,
       if (unused > 0)
         {
           /* Process one block from macbuf.  */
-          buf_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, blocksize);
+          cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf,
+                           blocksize);
           set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
 
           unused = 0;
@@ -86,7 +87,7 @@ do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen,
         {
           while (inlen >= blocksize)
             {
-              buf_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
+              cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
 
               set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
 
@@ -272,7 +273,7 @@ _gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf,
       burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding.  */
 
       /* Add S_0 */
-      buf_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
+      cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
 
       wipememory (c->u_ctr.ctr, 16);
       wipememory (c->u_mode.ccm.s0, 16);
diff --git a/cipher/cipher-cfb.c b/cipher/cipher-cfb.c
index 7f00aee5c..012c6c13c 100644
--- a/cipher/cipher-cfb.c
+++ b/cipher/cipher-cfb.c
@@ -87,7 +87,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c,
           nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
           burn = nburn > burn ? nburn : burn;
           /* XOR the input with the IV and store input into IV.  */
-          buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
           outbuf += blocksize;
           inbuf += blocksize;
           inbuflen -= blocksize;
@@ -97,11 +97,11 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c,
   if ( inbuflen >= blocksize )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       /* XOR the input with the IV and store input into IV */
-      buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
@@ -109,7 +109,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c,
   if ( inbuflen )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       c->unused = blocksize;
@@ -185,7 +185,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c,
           nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
           burn = nburn > burn ? nburn : burn;
           /* XOR the input with the IV and store input into IV. */
-          buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
           outbuf += blocksize;
           inbuf += blocksize;
           inbuflen -= blocksize;
@@ -195,11 +195,11 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c,
   if (inbuflen >= blocksize )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy ( c->lastiv, c->u_iv.iv, blocksize);
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize);
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       /* XOR the input with the IV and store input into IV */
-      buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
@@ -208,7 +208,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c,
   if (inbuflen)
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy ( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       c->unused = blocksize;
diff --git a/cipher/cipher-cmac.c b/cipher/cipher-cmac.c
index 321ab9eab..da550c372 100644
--- a/cipher/cipher-cmac.c
+++ b/cipher/cipher-cmac.c
@@ -66,7 +66,7 @@ _gcry_cmac_write (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
       for (; inlen && ctx->mac_unused < blocksize; inlen--)
         ctx->macbuf[ctx->mac_unused++] = *inbuf++;
 
-      buf_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+      cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
       set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
 
       ctx->mac_unused = 0;
@@ -86,7 +86,7 @@ _gcry_cmac_write (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
   else
     while (inlen > blocksize)
       {
-        buf_xor (ctx->u_iv.iv, ctx->u_iv.iv, inbuf, blocksize);
+        cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, inbuf, blocksize);
         set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
         inlen -= blocksize;
         inbuf += blocksize;
@@ -181,9 +181,9 @@ _gcry_cmac_final (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx)
         ctx->macbuf[count++] = 0;
     }
 
-  buf_xor (ctx->macbuf, ctx->macbuf, subkey, blocksize);
+  cipher_block_xor (ctx->macbuf, ctx->macbuf, subkey, blocksize);
 
-  buf_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+  cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
   burn = c->spec->encrypt (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv);
   if (burn)
     _gcry_burn_stack (burn + 4 * sizeof (void *));
diff --git a/cipher/cipher-ctr.c b/cipher/cipher-ctr.c
index b54fb5a73..2df0b8c3e 100644
--- a/cipher/cipher-ctr.c
+++ b/cipher/cipher-ctr.c
@@ -77,24 +77,38 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c,
     {
       unsigned char tmp[MAX_BLOCKSIZE];
 
-      do {
-        nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr);
-        burn = nburn > burn ? nburn : burn;
-
-        for (i = blocksize; i > 0; i--)
-          {
-            c->u_ctr.ctr[i-1]++;
-            if (c->u_ctr.ctr[i-1] != 0)
-              break;
-          }
-
-        n = blocksize < inbuflen ? blocksize : inbuflen;
-        buf_xor(outbuf, inbuf, tmp, n);
-
-        inbuflen -= n;
-        outbuf += n;
-        inbuf += n;
-      } while (inbuflen);
+      do
+        {
+          nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr);
+          burn = nburn > burn ? nburn : burn;
+
+          for (i = blocksize; i > 0; i--)
+            {
+              c->u_ctr.ctr[i-1]++;
+              if (c->u_ctr.ctr[i-1] != 0)
+                break;
+            }
+
+          if (inbuflen < blocksize)
+            break;
+          n = blocksize;
+          cipher_block_xor(outbuf, inbuf, tmp, blocksize);
+
+          inbuflen -= n;
+          outbuf += n;
+          inbuf += n;
+        }
+      while (inbuflen);
+
+      if (inbuflen)
+        {
+          n = inbuflen;
+          buf_xor(outbuf, inbuf, tmp, inbuflen);
+
+          inbuflen -= n;
+          outbuf += n;
+          inbuf += n;
+        }
 
       /* Save the unused bytes of the counter.  */
       c->unused = blocksize - n;
diff --git a/cipher/cipher-eax.c b/cipher/cipher-eax.c
index 1ce479755..3b17bb648 100644
--- a/cipher/cipher-eax.c
+++ b/cipher/cipher-eax.c
@@ -157,8 +157,8 @@ _gcry_cipher_eax_set_nonce (gcry_cipher_hd_t c, const byte *nonce,
   if (err != 0)
     return err;
 
-  buf_cpy (c->u_iv.iv, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
-  buf_cpy (c->u_ctr.ctr, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+  cipher_block_cpy (c->u_iv.iv, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+  cipher_block_cpy (c->u_ctr.ctr, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
 
   wipememory (&nonce_cmac, sizeof(nonce_cmac));
 
@@ -201,9 +201,10 @@ _gcry_cipher_eax_tag (gcry_cipher_hd_t c,
       if (err != 0)
 	return err;
 
-      buf_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_header.u_iv.iv, MAX_BLOCKSIZE);
-      buf_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_ciphertext.u_iv.iv,
-		 MAX_BLOCKSIZE);
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_header.u_iv.iv,
+                          MAX_BLOCKSIZE);
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_ciphertext.u_iv.iv,
+                          MAX_BLOCKSIZE);
 
       _gcry_cmac_reset (&c->u_mode.eax.cmac_header);
       _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext);
diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c
index a32724975..0f26277a5 100644
--- a/cipher/cipher-gcm-intel-pclmul.c
+++ b/cipher/cipher-gcm-intel-pclmul.c
@@ -248,7 +248,8 @@ static inline void gfmul_pclmul_aggr4(void)
 void
 _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
 {
-  u64 tmp[2];
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
 #if defined(__x86_64__) && defined(__WIN64__)
   char win64tmp[3 * 16];
 
@@ -262,15 +263,19 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
 #endif
 
   /* Swap endianness of hsub. */
-  tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8);
-  tmp[1] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 0);
-  buf_cpy (c->u_mode.gcm.u_ghash_key.key, tmp, GCRY_GCM_BLOCK_LEN);
+  asm volatile ("movdqu (%[key]), %%xmm0\n\t"
+                "pshufb %[be_mask], %%xmm0\n\t"
+                "movdqu %%xmm0, (%[key])\n\t"
+                :
+                : [key] "r" (c->u_mode.gcm.u_ghash_key.key),
+                  [be_mask] "m" (*be_mask)
+                : "memory");
 
 #ifdef __x86_64__
-  asm volatile ("movdqu %[h_1], %%xmm0\n\t"
-                "movdqa %%xmm0, %%xmm1\n\t"
+  asm volatile ("movdqa %%xmm0, %%xmm1\n\t"
                 :
-                : [h_1] "m" (*tmp));
+                :
+                : "memory");
 
   gfmul_pclmul (); /* H•H => H² */
 
@@ -324,8 +329,6 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
                 ::: "cc" );
 #endif
 #endif
-
-  wipememory (tmp, sizeof(tmp));
 }
 
 
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c
index 6169d1427..32ec9fa06 100644
--- a/cipher/cipher-gcm.c
+++ b/cipher/cipher-gcm.c
@@ -150,7 +150,7 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM)
   u32 A;
   int i;
 
-  buf_xor (V, result, buf, 16);
+  cipher_block_xor (V, result, buf, 16);
   V[0] = be_bswap64 (V[0]);
   V[1] = be_bswap64 (V[1]);
 
@@ -259,7 +259,7 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM)
   u32 T[3];
   int i;
 
-  buf_xor (V, result, buf, 16); /* V is big-endian */
+  cipher_block_xor (V, result, buf, 16); /* V is big-endian */
 
   /* First round can be manually tweaked based on fact that 'tmp' is zero. */
   i = 15;
@@ -342,7 +342,7 @@ do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
 #else
   unsigned long T[4];
 
-  buf_xor (V, result, buf, 16);
+  cipher_block_xor (V, result, buf, 16);
   for (i = 0; i < 4; i++)
     {
       V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8;
@@ -358,7 +358,7 @@ do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
       for (j = 0x80; j; j >>= 1)
         {
           if (hsub[i] & j)
-            buf_xor (p, p, V, 16);
+            cipher_block_xor (p, p, V, 16);
           if (bshift (V))
             V[0] ^= 0xe1000000;
         }
@@ -598,7 +598,7 @@ gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
                 }
 
               fix_ctr = 1;
-              buf_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
+              cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
             }
         }
 
@@ -928,8 +928,8 @@ _gcry_cipher_gcm_tag (gcry_cipher_hd_t c,
       /* Add bitlengths to tag. */
       do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths,
                    GCRY_GCM_BLOCK_LEN, 1);
-      buf_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
-               c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
+      cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
+                        c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
       c->marks.tag = 1;
 
       wipememory (bitlengths, sizeof (bitlengths));
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 6d87561de..f93363b43 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -603,4 +603,145 @@ static inline unsigned int _gcry_blocksize_shift(gcry_cipher_hd_t c)
 }
 
 
+/* Optimized function for cipher block copying */
+static inline void
+cipher_block_cpy(void *_dst, const void *_src, size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src = _src;
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      buf_put_he64(dst + 0, s[0]);
+      buf_put_he64(dst + 8, s[1]);
+    }
+}
+
+
+/* Optimized function for cipher block xoring */
+static inline void
+cipher_block_xor(void *_dst, const void *_src1, const void *_src2,
+                 size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src1 = _src1;
+  const byte *src2 = _src2;
+  u64 s1[2];
+  u64 s2[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s1[0] = buf_get_he64(src1 + 0);
+      s1[1] = buf_get_he64(src1 + 8);
+      s2[0] = buf_get_he64(src2 + 0);
+      s2[1] = buf_get_he64(src2 + 8);
+      buf_put_he64(dst + 0, s1[0] ^ s2[0]);
+      buf_put_he64(dst + 8, s1[1] ^ s2[1]);
+    }
+}
+
+
+/* Optimized function for in-place cipher block xoring */
+static inline void
+cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize)
+{
+  cipher_block_xor (_dst, _dst, _src, blocksize);
+}
+
+
+/* Optimized function for cipher block xoring with two destination cipher
+   blocks.  Used mainly by CFB mode encryption.  */
+static inline void
+cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src,
+                      size_t blocksize)
+{
+  byte *dst1 = _dst1;
+  byte *dst2 = _dst2;
+  const byte *src = _src;
+  u64 d2[2];
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0);
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst1 + 0, d2[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      d2[0] = buf_get_he64(dst2 + 0);
+      d2[1] = buf_get_he64(dst2 + 8);
+      d2[0] = d2[0] ^ s[0];
+      d2[1] = d2[1] ^ s[1];
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst2 + 8, d2[1]);
+      buf_put_he64(dst1 + 0, d2[0]);
+      buf_put_he64(dst1 + 8, d2[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CBC mode decryption.  */
+static inline void
+cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor,
+                          void *_srcdst_cpy, const void *_src_cpy,
+                          size_t blocksize)
+{
+  byte *dst_xor = _dst_xor;
+  byte *srcdst_cpy = _srcdst_cpy;
+  const byte *src_xor = _src_xor;
+  const byte *src_cpy = _src_cpy;
+  u64 sc[2];
+  u64 sx[2];
+  u64 sdc[2];
+
+  if (blocksize == 8)
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      buf_put_he64(dst_xor + 0,
+                   buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0));
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      sc[1] = buf_get_he64(src_cpy + 8);
+      sx[0] = buf_get_he64(src_xor + 0);
+      sx[1] = buf_get_he64(src_xor + 8);
+      sdc[0] = buf_get_he64(srcdst_cpy + 0);
+      sdc[1] = buf_get_he64(srcdst_cpy + 8);
+      sx[0] ^= sdc[0];
+      sx[1] ^= sdc[1];
+      buf_put_he64(dst_xor + 0, sx[0]);
+      buf_put_he64(dst_xor + 8, sx[1]);
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+      buf_put_he64(srcdst_cpy + 8, sc[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CFB mode decryption.  */
+static inline void
+cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src,
+                        size_t blocksize)
+{
+  cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize);
+}
+
+
 #endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index db42aaf16..f71520ad2 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -82,7 +82,7 @@ static void
 double_block_cpy (unsigned char *d, const unsigned char *s)
 {
   if (d != s)
-    buf_cpy (d, s, OCB_BLOCK_LEN);
+    cipher_block_cpy (d, s, OCB_BLOCK_LEN);
   double_block (d);
 }
 
@@ -181,8 +181,8 @@ _gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
   nburn = c->spec->encrypt (&c->context.c, ktop, ktop);
   burn = nburn > burn ? nburn : burn;
   /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
-  buf_cpy (stretch, ktop, OCB_BLOCK_LEN);
-  buf_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
+  cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN);
+  cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
   /* Offset_0 = Stretch[1+bottom..128+bottom]
      (We use the IV field to store the offset) */
   bit_copy (c->u_iv.iv, stretch, bottom, OCB_BLOCK_LEN);
@@ -267,18 +267,18 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
             }
           else
             {
-              buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
-                       OCB_BLOCK_LEN);
+              cipher_block_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                                OCB_BLOCK_LEN);
             }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset,
-                   c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset,
+                            c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           c->u_mode.ocb.aad_nleftover = 0;
         }
@@ -309,12 +309,13 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
           ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           abuf += OCB_BLOCK_LEN;
           abuflen -= OCB_BLOCK_LEN;
@@ -349,14 +350,15 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
           gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset,
-                     ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
-                     OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                              ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                              OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           abuf += OCB_BLOCK_LEN;
           abuflen -= OCB_BLOCK_LEN;
@@ -397,18 +399,18 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
   if (c->u_mode.ocb.aad_nleftover)
     {
       /* Offset_* = Offset_m xor L_*  */
-      buf_xor_1 (c->u_mode.ocb.aad_offset,
-                 c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                          c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
       /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_*  */
       buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover);
       memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0,
               OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover);
       l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
-      buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
       /* Sum = Sum_m xor ENCIPHER(K, CipherInput)  */
       nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
       burn = nburn > burn ? nburn : burn;
-      buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
       c->u_mode.ocb.aad_nleftover = 0;
     }
@@ -431,7 +433,7 @@ ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf,
   while (nblks > 0)
     {
       /* Checksum_i = Checksum_{i-1} xor P_i  */
-      buf_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
+      cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
 
       plainbuf += OCB_BLOCK_LEN;
       nblks--;
@@ -491,12 +493,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
             }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+          cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
           nburn = crypt_fn (&c->context.c, outbuf, outbuf);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
           if (!encrypt)
             {
@@ -551,14 +553,14 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
               gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
 
               /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-              buf_xor_1 (c->u_iv.iv,
-                         ocb_get_l (c, c->u_mode.ocb.data_nblocks),
-                         OCB_BLOCK_LEN);
+              cipher_block_xor_1 (c->u_iv.iv,
+                                  ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+                                  OCB_BLOCK_LEN);
               /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-              buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+              cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
               nburn = crypt_fn (&c->context.c, outbuf, outbuf);
               burn = nburn > burn ? nburn : burn;
-              buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+              cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
               inbuf += OCB_BLOCK_LEN;
               inbuflen -= OCB_BLOCK_LEN;
@@ -584,7 +586,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
       unsigned char pad[OCB_BLOCK_LEN];
 
       /* Offset_* = Offset_m xor L_*  */
-      buf_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
       /* Pad = ENCIPHER(K, Offset_*) */
       nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv);
       burn = nburn > burn ? nburn : burn;
@@ -596,7 +598,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
           buf_cpy (l_tmp, inbuf, inbuflen);
           memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen);
           l_tmp[inbuflen] = 0x80;
-          buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
           /* C_* = P_* xor Pad[1..bitlen(P_*)] */
           buf_xor (outbuf, inbuf, pad, inbuflen);
         }
@@ -604,13 +606,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
         {
           /* P_* = C_* xor Pad[1..bitlen(C_*)] */
           /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
-          buf_cpy (l_tmp, pad, OCB_BLOCK_LEN);
+          cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN);
           buf_cpy (l_tmp, inbuf, inbuflen);
-          buf_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
           l_tmp[inbuflen] = 0x80;
           buf_cpy (outbuf, l_tmp, inbuflen);
 
-          buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
         }
     }
 
@@ -618,8 +620,10 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
   if (c->marks.finalize)
     {
       /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */
-      buf_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, OCB_BLOCK_LEN);
-      buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN);
+      cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv,
+                        OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar,
+                          OCB_BLOCK_LEN);
       nburn = c->spec->encrypt (&c->context.c,
                                 c->u_mode.ocb.tag, c->u_mode.ocb.tag);
       burn = nburn > burn ? nburn : burn;
@@ -672,7 +676,8 @@ compute_tag_if_needed (gcry_cipher_hd_t c)
   if (!c->marks.tag)
     {
       ocb_aad_finalize (c);
-      buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum,
+                          OCB_BLOCK_LEN);
       c->marks.tag = 1;
     }
 }
diff --git a/cipher/cipher-ofb.c b/cipher/cipher-ofb.c
index 419a8d085..a3fd2993d 100644
--- a/cipher/cipher-ofb.c
+++ b/cipher/cipher-ofb.c
@@ -72,7 +72,7 @@ _gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c,
       /* Encrypt the IV (and save the current one). */
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
-      buf_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
diff --git a/cipher/cipher-xts.c b/cipher/cipher-xts.c
index 045b3539b..0522a271a 100644
--- a/cipher/cipher-xts.c
+++ b/cipher/cipher-xts.c
@@ -107,10 +107,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c,
   while (nblocks)
     {
       /* Xor-Encrypt/Decrypt-Xor block. */
-      buf_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
       nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
       burn = nburn > burn ? nburn : burn;
-      buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
 
       outbuf += GCRY_XTS_BLOCK_LEN;
       inbuf += GCRY_XTS_BLOCK_LEN;
@@ -133,10 +133,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c,
 	  xts_gfmul_byA (tmp.x1, c->u_ctr.ctr);
 
 	  /* Decrypt last block first. */
-	  buf_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+	  cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
 	  nburn = crypt_fn (&c->context.c, outbuf, outbuf);
 	  burn = nburn > burn ? nburn : burn;
-	  buf_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+	  cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
 
 	  inbuflen -= GCRY_XTS_BLOCK_LEN;
 	  inbuf += GCRY_XTS_BLOCK_LEN;
@@ -147,15 +147,15 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t c,
       outbuf -= GCRY_XTS_BLOCK_LEN;
 
       /* Steal ciphertext from previous block. */
-      buf_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
+      cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
       buf_cpy (tmp.x64, inbuf, inbuflen);
       buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen);
 
       /* Decrypt/Encrypt last block. */
-      buf_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
       nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
       burn = nburn > burn ? nburn : burn;
-      buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
     }
 
   /* Auto-increment data-unit sequence number */
diff --git a/cipher/des.c b/cipher/des.c
index 05092277e..a008b93e5 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -119,6 +119,7 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 
@@ -908,7 +909,7 @@ _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
       /* Encrypt the counter. */
       tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
       /* Increment the counter.  */
@@ -964,7 +965,7 @@ _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
          the intermediate result to SAVEBUF.  */
       tripledes_ecb_decrypt (ctx, inbuf, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
       inbuf += DES_BLOCKSIZE;
       outbuf += DES_BLOCKSIZE;
     }
@@ -1009,7 +1010,7 @@ _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
   for ( ;nblocks; nblocks-- )
     {
       tripledes_ecb_encrypt (ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
     }
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index d3fcb76f3..d126f8847 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -831,7 +831,7 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
           /* Encrypt the IV. */
           burn_depth = encrypt_fn (ctx, iv, iv);
           /* XOR the input with the IV and store input into IV.  */
-          buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
         }
@@ -891,7 +891,7 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
 
       for ( ;nblocks; nblocks-- )
         {
-          buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
+          cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
 
           burn_depth = encrypt_fn (ctx, outbuf, outbuf);
 
@@ -902,7 +902,7 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
         }
 
       if (last_iv != iv)
-        buf_cpy (iv, last_iv, BLOCKSIZE);
+        cipher_block_cpy (iv, last_iv, BLOCKSIZE);
     }
 
   if (burn_depth)
@@ -962,7 +962,7 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
           /* Encrypt the counter. */
           burn_depth = encrypt_fn (ctx, tmp.x1, ctr);
           /* XOR the input with the encrypted counter and store in output.  */
-          buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
           /* Increment the counter.  */
@@ -1207,7 +1207,7 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
       for ( ;nblocks; nblocks-- )
         {
           burn_depth = encrypt_fn (ctx, iv, iv);
-          buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
         }
@@ -1272,7 +1272,7 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
 
           burn_depth = decrypt_fn (ctx, savebuf, inbuf);
 
-          buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
         }
@@ -1330,15 +1330,15 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
-          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
           /* Checksum_i = Checksum_{i-1} xor P_i  */
-          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
-          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
 
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
@@ -1360,15 +1360,15 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
-          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           /* Checksum_i = Checksum_{i-1} xor P_i  */
-          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
-          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
 
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
@@ -1424,11 +1424,12 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE);
+          cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf,
+                            BLOCKSIZE);
           burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
 
           abuf += BLOCKSIZE;
         }
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 0736ad195..8e3faa7c5 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1002,7 +1002,7 @@ _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
       /* Encrypt the counter. */
       serpent_encrypt_internal(ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
       /* Increment the counter.  */
@@ -1117,7 +1117,8 @@ _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
          the intermediate result to SAVEBUF.  */
       serpent_decrypt_internal (ctx, inbuf, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                sizeof(serpent_block_t));
       inbuf += sizeof(serpent_block_t);
       outbuf += sizeof(serpent_block_t);
     }
@@ -1221,7 +1222,7 @@ _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
   for ( ;nblocks; nblocks-- )
     {
       serpent_encrypt_internal(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
     }
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 0d187bda4..51982c530 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -1161,7 +1161,7 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
         burn_stack_depth = burn;
 
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf  += TWOFISH_BLOCKSIZE;
       /* Increment the counter.  */
@@ -1243,7 +1243,7 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       inbuf += TWOFISH_BLOCKSIZE;
       outbuf += TWOFISH_BLOCKSIZE;
     }
@@ -1315,7 +1315,7 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
-      buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf += TWOFISH_BLOCKSIZE;
     }
-- 
2.17.1




More information about the Gcrypt-devel mailing list