[PATCH] Optimizations for AES-NI OCB
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sun Nov 11 14:41:09 CET 2018
* cipher/cipher-ocb.c (ocb_crypt): Process input in 24KiB chunks for
better cache locality for checksumming.
* cipher/rijndael-aesni.c (ALWAYS_INLINE): New macro for always
inlining functions, change all functions with 'inline' to use
ALWAYS_INLINE.
(NO_INLINE): New macro.
(aesni_prepare_2_6_variable, aesni_prepare_7_15_variable): Rename to...
(aesni_prepare_2_7_variable, aesni_prepare_8_15_variable): ...these and
adjust accordingly (xmm7 moved from *_7_15 to *_2_7).
(aesni_prepare_2_6, aesni_prepare_7_15): Rename to...
(aesni_prepare_2_7, aesni_prepare_8_15): ...these and adjust
accordingly.
(aesni_cleanup_2_6, aesni_cleanup_7_15): Rename to...
(aesni_cleanup_2_7, aesni_cleanup_8_15): ...these and adjust
accordingly.
(aesni_ocb_checksum): New.
(aesni_ocb_enc, aesni_ocb_dec): Calculate OCB offsets in parallel
with help of precalculated offsets L0+L1 ja L0+L1+L0; Do checksum
calculation as separate pass instead of inline; Use NO_INLINE.
* cipher/rijndael-internal.h (RIJNDAEL_context_s) [USE_AESNI]: Add
'use_avx2'.
* cipher/rijndael.c (do_setkey) [USE_AESNI]: Set 'use_avx2' if
Intel AVX2 HW feature is available.
* tests/basic.c (do_check_ocb_cipher): New test vector; increase
size of temporary buffers for new test vector.
(check_ocb_cipher_largebuf_split): Make test plaintext non-uniform
for better checksum testing.
(check_ocb_cipher_checksum): New.
(check_ocb_cipher_largebuf): Call check_ocb_cipher_checksum.
(check_ocb_cipher): New expected tags for check_ocb_cipher_largebuf
test runs.
--
Benchmark on Haswell i7-4970k @ 4.0Ghz:
Before:
AES | nanosecs/byte mebibytes/sec cycles/byte
OCB enc | 0.175 ns/B 5436 MiB/s 0.702 c/B
OCB dec | 0.184 ns/B 5184 MiB/s 0.736 c/B
After (enc +2% faster, dec +7% faster):
OCB enc | 0.172 ns/B 5557 MiB/s 0.686 c/B
OCB dec | 0.171 ns/B 5572 MiB/s 0.685 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index f71520ad2..cb6afd2b5 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -519,6 +519,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
nblks = nblks < nmaxblks ? nblks : nmaxblks;
+ /* Since checksum xoring is done before/after encryption/decryption,
+ process input in 24KiB chunks to keep data loaded in L1 cache for
+ checksumming. */
+ if (nblks > 24 * 1024 / OCB_BLOCK_LEN)
+ nblks = 24 * 1024 / OCB_BLOCK_LEN;
+
/* Use a bulk method if available. */
if (nblks && c->bulk.ocb_crypt)
{
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index d190c0ac4..081bf124c 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -41,6 +41,10 @@
#endif
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+
+
typedef struct u128_s
{
u32 a, b, c, d;
@@ -49,7 +53,7 @@ typedef struct u128_s
/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l
because of 'pragma target'. */
-static inline const unsigned char *
+static ALWAYS_INLINE const unsigned char *
aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
{
unsigned long ntz;
@@ -71,78 +75,78 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
the key or the data. */
#ifdef __WIN64__
/* XMM6-XMM15 are callee-saved registers on WIN64. */
-# define aesni_prepare_2_6_variable char win64tmp[16]
-# define aesni_prepare_7_15_variable char win64tmp7_15[16 * 9]
+# define aesni_prepare_2_7_variable char win64tmp[16 * 2]
+# define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8]
# define aesni_prepare() do { } while (0)
-# define aesni_prepare_2_6() \
+# define aesni_prepare_2_7() \
do { asm volatile ("movdqu %%xmm6, %0\n\t" \
- : "=m" (*win64tmp) \
+ "movdqu %%xmm7, %1\n\t" \
+ : "=m" (*win64tmp), "=m" (*(win64tmp+16)) \
: \
: "memory"); \
} while (0)
-# define aesni_prepare_7_15() \
- do { asm volatile ("movdqu %%xmm7, 0*16(%0)\n\t" \
- "movdqu %%xmm8, 1*16(%0)\n\t" \
- "movdqu %%xmm9, 2*16(%0)\n\t" \
- "movdqu %%xmm10, 3*16(%0)\n\t" \
- "movdqu %%xmm11, 4*16(%0)\n\t" \
- "movdqu %%xmm12, 5*16(%0)\n\t" \
- "movdqu %%xmm13, 6*16(%0)\n\t" \
- "movdqu %%xmm14, 7*16(%0)\n\t" \
- "movdqu %%xmm15, 8*16(%0)\n\t" \
+# define aesni_prepare_8_15() \
+ do { asm volatile ("movdqu %%xmm8, 0*16(%0)\n\t" \
+ "movdqu %%xmm9, 1*16(%0)\n\t" \
+ "movdqu %%xmm10, 2*16(%0)\n\t" \
+ "movdqu %%xmm11, 3*16(%0)\n\t" \
+ "movdqu %%xmm12, 4*16(%0)\n\t" \
+ "movdqu %%xmm13, 5*16(%0)\n\t" \
+ "movdqu %%xmm14, 6*16(%0)\n\t" \
+ "movdqu %%xmm15, 7*16(%0)\n\t" \
: \
- : "r" (win64tmp7_15) \
+ : "r" (win64tmp8_15) \
: "memory"); \
} while (0)
# define aesni_cleanup() \
do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
"pxor %%xmm1, %%xmm1\n" :: ); \
} while (0)
-# define aesni_cleanup_2_6() \
+# define aesni_cleanup_2_7() \
do { asm volatile ("movdqu %0, %%xmm6\n\t" \
+ "movdqu %1, %%xmm7\n\t" \
"pxor %%xmm2, %%xmm2\n" \
"pxor %%xmm3, %%xmm3\n" \
"pxor %%xmm4, %%xmm4\n" \
"pxor %%xmm5, %%xmm5\n" \
: \
- : "m" (*win64tmp) \
+ : "m" (*win64tmp), "m" (*(win64tmp+16)) \
: "memory"); \
} while (0)
-# define aesni_cleanup_7_15() \
- do { asm volatile ("movdqu 0*16(%0), %%xmm7\n\t" \
- "movdqu 1*16(%0), %%xmm8\n\t" \
- "movdqu 2*16(%0), %%xmm9\n\t" \
- "movdqu 3*16(%0), %%xmm10\n\t" \
- "movdqu 4*16(%0), %%xmm11\n\t" \
- "movdqu 5*16(%0), %%xmm12\n\t" \
- "movdqu 6*16(%0), %%xmm13\n\t" \
- "movdqu 7*16(%0), %%xmm14\n\t" \
- "movdqu 8*16(%0), %%xmm15\n\t" \
+# define aesni_cleanup_8_15() \
+ do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t" \
+ "movdqu 1*16(%0), %%xmm9\n\t" \
+ "movdqu 2*16(%0), %%xmm10\n\t" \
+ "movdqu 3*16(%0), %%xmm11\n\t" \
+ "movdqu 4*16(%0), %%xmm12\n\t" \
+ "movdqu 5*16(%0), %%xmm13\n\t" \
+ "movdqu 6*16(%0), %%xmm14\n\t" \
+ "movdqu 7*16(%0), %%xmm15\n\t" \
: \
- : "r" (win64tmp7_15) \
+ : "r" (win64tmp8_15) \
: "memory"); \
} while (0)
#else
-# define aesni_prepare_2_6_variable
+# define aesni_prepare_2_7_variable
# define aesni_prepare() do { } while (0)
-# define aesni_prepare_2_6() do { } while (0)
+# define aesni_prepare_2_7() do { } while (0)
# define aesni_cleanup() \
do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
"pxor %%xmm1, %%xmm1\n" :: ); \
} while (0)
-# define aesni_cleanup_2_6() \
- do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \
+# define aesni_cleanup_2_7() \
+ do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \
+ "pxor %%xmm2, %%xmm2\n\t" \
"pxor %%xmm3, %%xmm3\n" \
"pxor %%xmm4, %%xmm4\n" \
"pxor %%xmm5, %%xmm5\n" \
"pxor %%xmm6, %%xmm6\n":: ); \
} while (0)
# ifdef __x86_64__
-# define aesni_prepare_7_15_variable
-# define aesni_prepare_7_15() do { } while (0)
-# define aesni_cleanup_7_15() \
- do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \
- "pxor %%xmm8, %%xmm8\n" \
+# define aesni_prepare_8_15_variable
+# define aesni_prepare_8_15() do { } while (0)
+# define aesni_cleanup_8_15() \
+ do { asm volatile ("pxor %%xmm8, %%xmm8\n" \
"pxor %%xmm9, %%xmm9\n" \
"pxor %%xmm10, %%xmm10\n" \
"pxor %%xmm11, %%xmm11\n" \
@@ -157,10 +161,10 @@ aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
void
_gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare();
- aesni_prepare_2_6();
+ aesni_prepare_2_7();
if (ctx->rounds < 12)
{
@@ -383,12 +387,12 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
}
aesni_cleanup();
- aesni_cleanup_2_6();
+ aesni_cleanup_2_7();
}
/* Make a decryption key from an encryption key. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_prepare_decryption (RIJNDAEL_context *ctx)
{
/* The AES-NI decrypt instructions use the Equivalent Inverse
@@ -447,7 +451,7 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
/* Encrypt one block using the Intel AES-NI instructions. Block is input
* and output through SSE register xmm0. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_enc (const RIJNDAEL_context *ctx)
{
#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
@@ -500,7 +504,7 @@ do_aesni_enc (const RIJNDAEL_context *ctx)
/* Decrypt one block using the Intel AES-NI instructions. Block is input
* and output through SSE register xmm0. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_dec (const RIJNDAEL_context *ctx)
{
#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
@@ -553,7 +557,7 @@ do_aesni_dec (const RIJNDAEL_context *ctx)
/* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input
* and output through SSE registers xmm1 to xmm4. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_enc_vec4 (const RIJNDAEL_context *ctx)
{
#define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t"
@@ -662,7 +666,7 @@ do_aesni_enc_vec4 (const RIJNDAEL_context *ctx)
/* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input
* and output through SSE registers xmm1 to xmm4. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_dec_vec4 (const RIJNDAEL_context *ctx)
{
#define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t"
@@ -773,7 +777,7 @@ do_aesni_dec_vec4 (const RIJNDAEL_context *ctx)
/* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input
* and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_enc_vec8 (const RIJNDAEL_context *ctx)
{
asm volatile ("movdqa (%[key]), %%xmm0\n\t"
@@ -925,7 +929,7 @@ do_aesni_enc_vec8 (const RIJNDAEL_context *ctx)
/* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input
* and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */
-static inline void
+static ALWAYS_INLINE void
do_aesni_dec_vec8 (const RIJNDAEL_context *ctx)
{
asm volatile ("movdqa (%[key]), %%xmm0\n\t"
@@ -1757,10 +1761,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
unsigned char *outbuf, const unsigned char *inbuf,
size_t nblocks, int cbc_mac)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6();
+ aesni_prepare_2_7();
asm volatile ("movdqu %[iv], %%xmm5\n\t"
: /* No output */
@@ -1794,7 +1798,7 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
: "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
@@ -1805,10 +1809,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
{
static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6();
+ aesni_prepare_2_7();
asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
"movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */
@@ -1820,9 +1824,9 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
+ aesni_prepare_8_15_variable;
- aesni_prepare_7_15();
+ aesni_prepare_8_15();
for ( ;nblocks >= 8 ; nblocks -= 8 )
{
@@ -1831,7 +1835,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
inbuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -1848,7 +1852,7 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
inbuf += BLOCKSIZE;
}
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
@@ -1876,10 +1880,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
unsigned char *outbuf, const unsigned char *inbuf,
size_t nblocks)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6();
+ aesni_prepare_2_7();
asm volatile ("movdqu %[iv], %%xmm6\n\t"
: /* No output */
@@ -1891,9 +1895,9 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
+ aesni_prepare_8_15_variable;
- aesni_prepare_7_15();
+ aesni_prepare_8_15();
for ( ;nblocks >= 8; nblocks -= 8)
{
@@ -1953,7 +1957,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
inbuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -2022,7 +2026,7 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
: "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
@@ -2031,10 +2035,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
unsigned char *outbuf, const unsigned char *inbuf,
size_t nblocks)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6();
+ aesni_prepare_2_7();
if ( !ctx->decryption_prepared )
{
@@ -2051,9 +2055,9 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
+ aesni_prepare_8_15_variable;
- aesni_prepare_7_15();
+ aesni_prepare_8_15();
for ( ;nblocks >= 8 ; nblocks -= 8 )
{
@@ -2113,7 +2117,7 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
inbuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -2187,11 +2191,119 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
: "memory");
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
-static void
+static ALWAYS_INLINE void
+aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+
+ /* Calculate checksum */
+ asm volatile ("movdqu %[checksum], %%xmm6\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ :
+ :[checksum] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+#if defined(HAVE_GCC_INLINE_ASM_AVX2)
+ if (nblocks >= 16 && ctx->use_avx2)
+ {
+ asm volatile ("vzeroupper\n\t"
+ "vpxor %%xmm0, %%xmm0, %%xmm0\n\t"
+ "vpxor %%xmm4, %%xmm4, %%xmm4\n\t"
+ "vpxor %%xmm5, %%xmm5, %%xmm5\n\t"
+ "vpxor %%xmm7, %%xmm7, %%xmm7\n\t"
+ :
+ :
+ : "memory");
+
+ for (;nblocks >= 16; nblocks -= 16)
+ {
+ asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t"
+ "vpxor %[ptr1], %%ymm1, %%ymm1\n\t"
+ "vpxor %[ptr2], %%ymm2, %%ymm2\n\t"
+ "vpxor %[ptr3], %%ymm3, %%ymm3\n\t"
+ "vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
+ "vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
+ "vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
+ "vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
+ [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
+ [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
+ [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)),
+ [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+ [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
+ [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
+ [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
+ : "memory" );
+ plaintext += BLOCKSIZE * 16;
+ }
+
+ asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t"
+ "vpxor %%ymm4, %%ymm1, %%ymm1\n\t"
+ "vpxor %%ymm5, %%ymm2, %%ymm2\n\t"
+ "vpxor %%ymm7, %%ymm3, %%ymm3\n\t"
+ "vextracti128 $1, %%ymm6, %%xmm0\n\t"
+ "vextracti128 $1, %%ymm1, %%xmm4\n\t"
+ "vextracti128 $1, %%ymm2, %%xmm5\n\t"
+ "vextracti128 $1, %%ymm3, %%xmm7\n\t"
+ "vpxor %%xmm0, %%xmm6, %%xmm6\n\t"
+ "vpxor %%xmm4, %%xmm1, %%xmm1\n\t"
+ "vpxor %%xmm5, %%xmm2, %%xmm2\n\t"
+ "vpxor %%xmm7, %%xmm3, %%xmm3\n\t"
+ "vzeroupper\n\t"
+ :
+ :
+ : "memory" );
+ }
+#endif
+
+ for (;nblocks >= 4; nblocks -= 4)
+ {
+ asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+ "movdqu %[ptr1], %%xmm4\n\t"
+ "movdqu %[ptr2], %%xmm5\n\t"
+ "movdqu %[ptr3], %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm1\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "pxor %%xmm7, %%xmm3\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)),
+ [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)),
+ [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)),
+ [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE))
+ : "memory" );
+ plaintext += BLOCKSIZE * 4;
+ }
+
+ for (;nblocks >= 1; nblocks -= 1)
+ {
+ asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE))
+ : "memory" );
+ plaintext += BLOCKSIZE;
+ }
+
+ asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "movdqu %%xmm6, %[checksum]\n\t"
+ : [checksum] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+}
+
+
+static unsigned int NO_INLINE
aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks)
{
@@ -2200,31 +2312,35 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
const unsigned char *inbuf = inbuf_arg;
u64 n = c->u_mode.ocb.data_nblocks;
const unsigned char *l;
- aesni_prepare_2_6_variable;
+ byte tempbuf[16 * 2 + 15];
+ byte *l0l1;
+ byte *l0l1l0;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6 ();
+ aesni_prepare_2_7 ();
+
+ aesni_ocb_checksum (c, inbuf_arg, nblocks);
- /* Preload Offset and Checksum */
+ asm volatile ("" : "=r" (l0l1) : "0" (tempbuf) : "memory");
+ l0l1 = l0l1 + (-(uintptr_t)l0l1 & 15);
+ l0l1l0 = l0l1 + 16;
+
+ /* Preload Offset */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
- "movdqu %[ctr], %%xmm6\n\t"
: /* No output */
- : [iv] "m" (*c->u_iv.iv),
- [ctr] "m" (*c->u_ctr.ctr)
+ : [iv] "m" (*c->u_iv.iv)
: "memory" );
-
for ( ;nblocks && n % 4; nblocks-- )
{
l = aes_ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- /* Checksum_i = Checksum_{i-1} xor P_i */
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
asm volatile ("movdqu %[l], %%xmm1\n\t"
"movdqu %[inbuf], %%xmm0\n\t"
"pxor %%xmm1, %%xmm5\n\t"
- "pxor %%xmm0, %%xmm6\n\t"
"pxor %%xmm5, %%xmm0\n\t"
:
: [l] "m" (*l),
@@ -2243,95 +2359,103 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
outbuf += BLOCKSIZE;
}
+ asm volatile ("movdqu %[l0], %%xmm6\n\t"
+ "movdqu %[l1], %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1]\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1l0]\n\t"
+ : [l0l1] "=m" (*l0l1),
+ [l0l1l0] "=m" (*l0l1l0)
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l1] "m" (*c->u_mode.ocb.L[1])
+ : "memory" );
+
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
-
- aesni_prepare_7_15();
+ aesni_prepare_8_15_variable;
- asm volatile ("movdqu %[l0], %%xmm7\n\t"
- :
- : [l0] "m" (*c->u_mode.ocb.L[0])
- : "memory" );
+ aesni_prepare_8_15();
for ( ;nblocks >= 8 ; nblocks -= 8 )
{
n += 4;
l = aes_ocb_get_l(c, n);
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- /* Checksum_i = Checksum_{i-1} xor P_i */
- /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
-
- asm volatile ("movdqu %[l1], %%xmm10\n\t"
- "movdqu %[inbuf0], %%xmm1\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm1, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm1\n\t"
- "movdqa %%xmm5, %%xmm12\n\t"
+ asm volatile ("movdqa %[l0l1], %%xmm10\n\t"
+ "movdqa %[l0l1l0], %%xmm11\n\t"
+ "movdqu %[l3], %%xmm15\n\t"
:
- : [l1] "m" (*c->u_mode.ocb.L[1]),
- [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : [l0l1] "m" (*l0l1),
+ [l0l1l0] "m" (*l0l1l0),
+ [l3] "m" (*l)
: "memory" );
- asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
- "pxor %%xmm10, %%xmm5\n\t"
- "pxor %%xmm2, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm2\n\t"
- "movdqa %%xmm5, %%xmm13\n\t"
+
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */
+
+ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
:
- : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm3, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm3\n\t"
- "movdqa %%xmm5, %%xmm14\n\t"
+ asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+ "movdqu %[inbuf4], %%xmm8\n\t"
+ "movdqu %[inbuf5], %%xmm9\n\t"
:
- : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+ [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+ [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l3], %%xmm15\n\t"
- "movdqu %[inbuf3], %%xmm4\n\t"
+ asm volatile ("movdqa %%xmm6, %%xmm12\n\t"
+ "pxor %%xmm5, %%xmm12\n\t"
+ "pxor %%xmm12, %%xmm1\n\t"
+
+ "movdqa %%xmm10, %%xmm13\n\t"
+ "pxor %%xmm5, %%xmm13\n\t"
+ "pxor %%xmm13, %%xmm2\n\t"
+
+ "movdqa %%xmm11, %%xmm14\n\t"
+ "pxor %%xmm5, %%xmm14\n\t"
+ "pxor %%xmm14, %%xmm3\n\t"
+
+ "pxor %%xmm11, %%xmm5\n\t"
"pxor %%xmm15, %%xmm5\n\t"
- "pxor %%xmm4, %%xmm6\n\t"
"pxor %%xmm5, %%xmm4\n\t"
"movdqa %%xmm5, %%xmm15\n\t"
- :
- : [l3] "m" (*l),
- [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
- : "memory" );
- n += 4;
- l = aes_ocb_get_l(c, n);
-
- asm volatile ("movdqu %[inbuf4], %%xmm8\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm8, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm8\n\t"
- "movdqu %%xmm5, %[outbuf4]\n\t"
- : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
- : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE))
- : "memory" );
- asm volatile ("movdqu %[inbuf5], %%xmm9\n\t"
- "pxor %%xmm10, %%xmm5\n\t"
- "pxor %%xmm9, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm9\n\t"
- "movdqu %%xmm5, %[outbuf5]\n\t"
- : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE))
- : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+ "movdqa %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm8\n\t"
+ "movdqu %%xmm0, %[outbuf4]\n\t"
+
+ "movdqa %%xmm10, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm9\n\t"
+ "movdqu %%xmm0, %[outbuf5]\n\t"
+ : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
+ [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE))
+ :
: "memory" );
asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm10, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm10\n\t"
- "movdqu %%xmm5, %[outbuf6]\n\t"
+ "movdqa %%xmm11, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm10\n\t"
+ "movdqu %%xmm0, %[outbuf6]\n\t"
: [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE))
: [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l7], %%xmm11\n\t"
+ asm volatile ("movdqu %[l7], %%xmm0\n\t"
"pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
"movdqu %[inbuf7], %%xmm11\n\t"
- "pxor %%xmm11, %%xmm6\n\t"
"pxor %%xmm5, %%xmm11\n\t"
:
: [l7] "m" (*l),
@@ -2374,7 +2498,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
inbuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -2384,44 +2508,47 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
l = aes_ocb_get_l(c, n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- /* Checksum_i = Checksum_{i-1} xor P_i */
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
- asm volatile ("movdqu %[l0], %%xmm4\n\t"
+
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
"movdqu %[inbuf0], %%xmm1\n\t"
- "pxor %%xmm4, %%xmm5\n\t"
- "pxor %%xmm1, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm1\n\t"
- "movdqu %%xmm5, %[outbuf0]\n\t"
- : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ "movdqa %[l0l1], %%xmm3\n\t"
+ :
: [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*l0l1),
[inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l1], %%xmm0\n\t"
- "movdqu %[inbuf1], %%xmm2\n\t"
- "pxor %%xmm0, %%xmm5\n\t"
- "pxor %%xmm2, %%xmm6\n\t"
- "pxor %%xmm5, %%xmm2\n\t"
- "movdqu %%xmm5, %[outbuf1]\n\t"
- : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
- : [l1] "m" (*c->u_mode.ocb.L[1]),
- [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ asm volatile ("movdqa %[l0l1l0], %%xmm4\n\t"
+ "movdqu %[l3], %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm0, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0l1l0] "m" (*l0l1l0),
+ [l3] "m" (*l)
: "memory" );
- asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
- "pxor %%xmm4, %%xmm5\n\t"
- "pxor %%xmm3, %%xmm6\n\t"
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
"pxor %%xmm5, %%xmm3\n\t"
- "movdqu %%xmm5, %[outbuf2]\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ "movdqu %%xmm3, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm0, %[outbuf2]\n\t"
: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
- : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ :
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l3], %%xmm4\n\t"
+ asm volatile ("pxor %%xmm6, %%xmm5\n\t"
"pxor %%xmm4, %%xmm5\n\t"
"movdqu %[inbuf3], %%xmm4\n\t"
- "pxor %%xmm4, %%xmm6\n\t"
"pxor %%xmm5, %%xmm4\n\t"
:
- : [l3] "m" (*l),
- [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
: "memory" );
do_aesni_enc_vec4 (ctx);
@@ -2453,12 +2580,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
l = aes_ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- /* Checksum_i = Checksum_{i-1} xor P_i */
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
asm volatile ("movdqu %[l], %%xmm1\n\t"
"movdqu %[inbuf], %%xmm0\n\t"
"pxor %%xmm1, %%xmm5\n\t"
- "pxor %%xmm0, %%xmm6\n\t"
"pxor %%xmm5, %%xmm0\n\t"
:
: [l] "m" (*l),
@@ -2479,30 +2604,41 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
c->u_mode.ocb.data_nblocks = n;
asm volatile ("movdqu %%xmm5, %[iv]\n\t"
- "movdqu %%xmm6, %[ctr]\n\t"
- : [iv] "=m" (*c->u_iv.iv),
- [ctr] "=m" (*c->u_ctr.ctr)
+ : [iv] "=m" (*c->u_iv.iv)
:
: "memory" );
+ asm volatile ("pxor %%xmm0, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1]\n\t"
+ "movdqa %%xmm0, %[l0l1l0]\n\t"
+ : [l0l1] "=m" (*l0l1),
+ [l0l1l0] "=m" (*l0l1l0)
+ :
+ : "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
+
+ return 0;
}
-static void
+static unsigned int NO_INLINE
aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
- const void *inbuf_arg, size_t nblocks)
+ const void *inbuf_arg, size_t nblocks_arg)
{
RIJNDAEL_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
u64 n = c->u_mode.ocb.data_nblocks;
const unsigned char *l;
- aesni_prepare_2_6_variable;
+ size_t nblocks = nblocks_arg;
+ byte tempbuf[16 * 2 + 15];
+ byte *l0l1;
+ byte *l0l1l0;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6 ();
+ aesni_prepare_2_7 ();
if ( !ctx->decryption_prepared )
{
@@ -2510,12 +2646,14 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
ctx->decryption_prepared = 1;
}
- /* Preload Offset and Checksum */
+ asm volatile ("" : "=r" (l0l1) : "0" (tempbuf) : "memory");
+ l0l1 = l0l1 + (-(uintptr_t)l0l1 & 15);
+ l0l1l0 = l0l1 + 16;
+
+ /* Preload Offset */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
- "movdqu %[ctr], %%xmm6\n\t"
: /* No output */
- : [iv] "m" (*c->u_iv.iv),
- [ctr] "m" (*c->u_ctr.ctr)
+ : [iv] "m" (*c->u_iv.iv)
: "memory" );
for ( ;nblocks && n % 4; nblocks-- )
@@ -2524,7 +2662,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
- /* Checksum_i = Checksum_{i-1} xor P_i */
asm volatile ("movdqu %[l], %%xmm1\n\t"
"movdqu %[inbuf], %%xmm0\n\t"
"pxor %%xmm1, %%xmm5\n\t"
@@ -2537,7 +2674,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
do_aesni_dec (ctx);
asm volatile ("pxor %%xmm5, %%xmm0\n\t"
- "pxor %%xmm0, %%xmm6\n\t"
"movdqu %%xmm0, %[outbuf]\n\t"
: [outbuf] "=m" (*outbuf)
:
@@ -2547,87 +2683,103 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
outbuf += BLOCKSIZE;
}
+ asm volatile ("movdqu %[l0], %%xmm6\n\t"
+ "movdqu %[l1], %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1]\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1l0]\n\t"
+ : [l0l1] "=m" (*l0l1),
+ [l0l1l0] "=m" (*l0l1l0)
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l1] "m" (*c->u_mode.ocb.L[1])
+ : "memory" );
+
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
-
- aesni_prepare_7_15();
+ aesni_prepare_8_15_variable;
- asm volatile ("movdqu %[l0], %%xmm7\n\t"
- :
- : [l0] "m" (*c->u_mode.ocb.L[0])
- : "memory" );
+ aesni_prepare_8_15();
for ( ;nblocks >= 8 ; nblocks -= 8 )
{
n += 4;
l = aes_ocb_get_l(c, n);
+ asm volatile ("movdqa %[l0l1], %%xmm10\n\t"
+ "movdqa %[l0l1l0], %%xmm11\n\t"
+ "movdqu %[l3], %%xmm15\n\t"
+ :
+ : [l0l1] "m" (*l0l1),
+ [l0l1l0] "m" (*l0l1l0),
+ [l3] "m" (*l)
+ : "memory" );
+
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
- /* Checksum_i = Checksum_{i-1} xor P_i */
- asm volatile ("movdqu %[l1], %%xmm10\n\t"
- "movdqu %[inbuf0], %%xmm1\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm1\n\t"
- "movdqa %%xmm5, %%xmm12\n\t"
+ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
:
- : [l1] "m" (*c->u_mode.ocb.L[1]),
- [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
- "pxor %%xmm10, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm2\n\t"
- "movdqa %%xmm5, %%xmm13\n\t"
- :
- : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
- : "memory" );
- asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm3\n\t"
- "movdqa %%xmm5, %%xmm14\n\t"
+ asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+ "movdqu %[inbuf4], %%xmm8\n\t"
+ "movdqu %[inbuf5], %%xmm9\n\t"
:
- : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+ [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+ [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l3], %%xmm0\n\t"
- "movdqu %[inbuf3], %%xmm4\n\t"
- "pxor %%xmm0, %%xmm5\n\t"
+ asm volatile ("movdqa %%xmm6, %%xmm12\n\t"
+ "pxor %%xmm5, %%xmm12\n\t"
+ "pxor %%xmm12, %%xmm1\n\t"
+
+ "movdqa %%xmm10, %%xmm13\n\t"
+ "pxor %%xmm5, %%xmm13\n\t"
+ "pxor %%xmm13, %%xmm2\n\t"
+
+ "movdqa %%xmm11, %%xmm14\n\t"
+ "pxor %%xmm5, %%xmm14\n\t"
+ "pxor %%xmm14, %%xmm3\n\t"
+
+ "pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm15, %%xmm5\n\t"
"pxor %%xmm5, %%xmm4\n\t"
"movdqa %%xmm5, %%xmm15\n\t"
- :
- : [l3] "m" (*l),
- [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
- : "memory" );
- n += 4;
- l = aes_ocb_get_l(c, n);
-
- asm volatile ("movdqu %[inbuf4], %%xmm8\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm8\n\t"
- "movdqu %%xmm5, %[outbuf4]\n\t"
- : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
- : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE))
- : "memory" );
- asm volatile ("movdqu %[inbuf5], %%xmm9\n\t"
- "pxor %%xmm10, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm9\n\t"
- "movdqu %%xmm5, %[outbuf5]\n\t"
- : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE))
- : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+ "movdqa %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm8\n\t"
+ "movdqu %%xmm0, %[outbuf4]\n\t"
+
+ "movdqa %%xmm10, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm9\n\t"
+ "movdqu %%xmm0, %[outbuf5]\n\t"
+ : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
+ [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE))
+ :
: "memory" );
asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
- "pxor %%xmm7, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm10\n\t"
- "movdqu %%xmm5, %[outbuf6]\n\t"
+ "movdqa %%xmm11, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm10\n\t"
+ "movdqu %%xmm0, %[outbuf6]\n\t"
: [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE))
: [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE))
: "memory" );
asm volatile ("movdqu %[l7], %%xmm0\n\t"
- "movdqu %[inbuf7], %%xmm11\n\t"
+ "pxor %%xmm11, %%xmm5\n\t"
"pxor %%xmm0, %%xmm5\n\t"
+ "movdqu %[inbuf7], %%xmm11\n\t"
"pxor %%xmm5, %%xmm11\n\t"
:
: [l7] "m" (*l),
@@ -2655,14 +2807,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
"movdqu %%xmm9, %[outbuf5]\n\t"
"movdqu %%xmm10, %[outbuf6]\n\t"
"movdqu %%xmm11, %[outbuf7]\n\t"
- "pxor %%xmm2, %%xmm1\n\t"
- "pxor %%xmm4, %%xmm1\n\t"
- "pxor %%xmm9, %%xmm1\n\t"
- "pxor %%xmm11, %%xmm1\n\t"
- "pxor %%xmm3, %%xmm6\n\t"
- "pxor %%xmm8, %%xmm6\n\t"
- "pxor %%xmm10, %%xmm6\n\t"
- "pxor %%xmm1, %%xmm6\n\t"
: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
[outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
[outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
@@ -2678,7 +2822,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
inbuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -2688,40 +2832,47 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
l = aes_ocb_get_l(c, n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
- /* Checksum_i = Checksum_{i-1} xor P_i */
- asm volatile ("movdqu %[l0], %%xmm4\n\t"
+ /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i) */
+
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
"movdqu %[inbuf0], %%xmm1\n\t"
- "pxor %%xmm4, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm1\n\t"
- "movdqu %%xmm5, %[outbuf0]\n\t"
- : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ "movdqa %[l0l1], %%xmm3\n\t"
+ :
: [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*l0l1),
[inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l1], %%xmm0\n\t"
- "movdqu %[inbuf1], %%xmm2\n\t"
- "pxor %%xmm0, %%xmm5\n\t"
- "pxor %%xmm5, %%xmm2\n\t"
- "movdqu %%xmm5, %[outbuf1]\n\t"
- : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
- : [l1] "m" (*c->u_mode.ocb.L[1]),
- [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ asm volatile ("movdqa %[l0l1l0], %%xmm4\n\t"
+ "movdqu %[l3], %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm0, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0l1l0] "m" (*l0l1l0),
+ [l3] "m" (*l)
: "memory" );
- asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
- "pxor %%xmm4, %%xmm5\n\t"
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
"pxor %%xmm5, %%xmm3\n\t"
- "movdqu %%xmm5, %[outbuf2]\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ "movdqu %%xmm3, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm0, %[outbuf2]\n\t"
: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
- : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ :
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
: "memory" );
- asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ asm volatile ("pxor %%xmm6, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm5\n\t"
"movdqu %[inbuf3], %%xmm4\n\t"
- "pxor %%xmm0, %%xmm5\n\t"
"pxor %%xmm5, %%xmm4\n\t"
:
- : [l3] "m" (*l),
- [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
: "memory" );
do_aesni_dec_vec4 (ctx);
@@ -2737,10 +2888,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
"movdqu %%xmm3, %[outbuf2]\n\t"
"pxor %%xmm5, %%xmm4\n\t"
"movdqu %%xmm4, %[outbuf3]\n\t"
- "pxor %%xmm1, %%xmm6\n\t"
- "pxor %%xmm2, %%xmm6\n\t"
- "pxor %%xmm3, %%xmm6\n\t"
- "pxor %%xmm4, %%xmm6\n\t"
: [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
[outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
[outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
@@ -2771,7 +2918,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
do_aesni_dec (ctx);
asm volatile ("pxor %%xmm5, %%xmm0\n\t"
- "pxor %%xmm0, %%xmm6\n\t"
"movdqu %%xmm0, %[outbuf]\n\t"
: [outbuf] "=m" (*outbuf)
:
@@ -2783,14 +2929,23 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
c->u_mode.ocb.data_nblocks = n;
asm volatile ("movdqu %%xmm5, %[iv]\n\t"
- "movdqu %%xmm6, %[ctr]\n\t"
- : [iv] "=m" (*c->u_iv.iv),
- [ctr] "=m" (*c->u_ctr.ctr)
+ : [iv] "=m" (*c->u_iv.iv)
:
: "memory" );
+ aesni_ocb_checksum (c, outbuf_arg, nblocks_arg);
+
+ asm volatile ("pxor %%xmm0, %%xmm0\n\t"
+ "movdqa %%xmm0, %[l0l1]\n\t"
+ "movdqa %%xmm0, %[l0l1l0]\n\t"
+ : [l0l1] "=m" (*l0l1),
+ [l0l1l0] "=m" (*l0l1l0)
+ :
+ : "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
+
+ return 0;
}
@@ -2799,11 +2954,9 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks, int encrypt)
{
if (encrypt)
- aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+ return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
else
- aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
-
- return 0;
+ return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
}
@@ -2815,10 +2968,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
const unsigned char *abuf = abuf_arg;
u64 n = c->u_mode.ocb.aad_nblocks;
const unsigned char *l;
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6 ();
+ aesni_prepare_2_7 ();
/* Preload Offset and Sum */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -2856,9 +3009,9 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#ifdef __x86_64__
if (nblocks >= 8)
{
- aesni_prepare_7_15_variable;
+ aesni_prepare_8_15_variable;
- aesni_prepare_7_15();
+ aesni_prepare_8_15();
asm volatile ("movdqu %[l0], %%xmm7\n\t"
"movdqu %[l1], %%xmm12\n\t"
@@ -2948,7 +3101,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
abuf += 8*BLOCKSIZE;
}
- aesni_cleanup_7_15();
+ aesni_cleanup_8_15();
}
#endif
@@ -3038,7 +3191,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
: "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
return 0;
}
@@ -3053,10 +3206,10 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak,
unsigned char *outbuf, const unsigned char *inbuf,
size_t nblocks)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6 ();
+ aesni_prepare_2_7 ();
/* Preload Tweak */
asm volatile ("movdqu %[tweak], %%xmm5\n\t"
@@ -3182,7 +3335,7 @@ _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak,
: "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
@@ -3191,10 +3344,10 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
unsigned char *outbuf, const unsigned char *inbuf,
size_t nblocks)
{
- aesni_prepare_2_6_variable;
+ aesni_prepare_2_7_variable;
aesni_prepare ();
- aesni_prepare_2_6 ();
+ aesni_prepare_2_7 ();
if ( !ctx->decryption_prepared )
{
@@ -3326,7 +3479,7 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
: "memory" );
aesni_cleanup ();
- aesni_cleanup_2_6 ();
+ aesni_cleanup_2_7 ();
}
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 160fb8c36..1dcfcd5e4 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -143,6 +143,7 @@ typedef struct RIJNDAEL_context_s
#endif /*USE_PADLOCK*/
#ifdef USE_AESNI
unsigned int use_aesni:1; /* AES-NI shall be used. */
+ unsigned int use_avx2:1; /* AVX2 shall be used. */
#endif /*USE_AESNI*/
#ifdef USE_SSSE3
unsigned int use_ssse3:1; /* SSSE3 shall be used. */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 1bc8b0fc2..e8ec7993b 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -334,6 +334,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
ctx->prefetch_enc_fn = NULL;
ctx->prefetch_dec_fn = NULL;
ctx->use_aesni = 1;
+ ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2);
if (hd)
{
hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc;
diff --git a/tests/basic.c b/tests/basic.c
index f3d895153..0afae3047 100644
--- a/tests/basic.c
+++ b/tests/basic.c
@@ -4411,11 +4411,114 @@ do_check_ocb_cipher (int inplace)
"1792A4E31E0755FB03E31B22116E6C2DDF9EFD6E33D536F1"
"A0124B0A55BAE884ED93481529C76B6AD0C515F4D1CDD4FD"
"AC4F02AA"
+ },
+ { GCRY_CIPHER_AES, 12, "0F0E0D0C0B0A09080706050403020100",
+ "BBAA9988776655443322110D",
+ "000102030405060708090A0B0C0D0E0F1011121314151617"
+ "18191A1B1C1D1E1F2021222324252627",
+ /* test vector for checksumming */
+ "01000000000000000000000000000000"
+ "02000000000000000000000000000000"
+ "04000000000000000000000000000000"
+ "08000000000000000000000000000000"
+ "10000000000000000000000000000000"
+ "20000000000000000000000000000000"
+ "40000000000000000000000000000000"
+ "80000000000000000000000000000000"
+ "00010000000000000000000000000000"
+ "00020000000000000000000000000000"
+ "00040000000000000000000000000000"
+ "00080000000000000000000000000000"
+ "00100000000000000000000000000000"
+ "00200000000000000000000000000000"
+ "00400000000000000000000000000000"
+ "00800000000000000000000000000000"
+ "00000100000000000000000000000000"
+ "00000200000000000000000000000000"
+ "00000400000000000000000000000000"
+ "00000800000000000000000000000000"
+ "00001000000000000000000000000000"
+ "00002000000000000000000000000000"
+ "00004000000000000000000000000000"
+ "00008000000000000000000000000000"
+ "00000001000000000000000000000000"
+ "00000002000000000000000000000000"
+ "00000004000000000000000000000000"
+ "00000008000000000000000000000000"
+ "00000010000000000000000000000000"
+ "00000020000000000000000000000000"
+ "00000040000000000000000000000000"
+ "00000080000000000000000000000000"
+ "00000000010000000000000000000000"
+ "00000000020000000000000000000000"
+ "00000000040000000000000000000000"
+ "00000000080000000000000000000000"
+ "00000000100000000000000000000000"
+ "00000000200000000000000000000000"
+ "00000000400000000000000000000000"
+ "00000000800000000000000000000000"
+ "00000000000100000000000000000000"
+ "00000000000200000000000000000000"
+ "00000000000400000000000000000000"
+ "00000000000800000000000000000000"
+ "00000000001000000000000000000000"
+ "00000000002000000000000000000000"
+ "00000000004000000000000000000000"
+ "00000000008000000000000000000000",
+ "01105c6e36f6ac480f022c51e31ed702"
+ "90fda4b7b783194d4b4be8e4e1e2dff4"
+ "6a0804d1c5f9f808ea7933e31c063233"
+ "2bf65a22b20bb13cde3b80b3682ba965"
+ "b1207c58916f7856fa9968b410e50dee"
+ "98b35c071163d1b352b9bbccd09fde29"
+ "b850f40e71a8ae7d2e2d577f5ee39c46"
+ "7fa28130b50a123c29958e4665dda9a5"
+ "e0793997f8f19633a96392141d6e0e88"
+ "77850ed4364065d1d2f8746e2f1d5fd1"
+ "996cdde03215306503a30e41f58ef3c4"
+ "400365cfea4fa6381157c12a46598edf"
+ "18604854462ec66e3d3cf26d4723cb6a"
+ "9d801095048086a606fdb9192760889b"
+ "a8ce2e70e1b55a469137a9e2e6734565"
+ "283cb1e2c74f37e0854d03e33f8ba499"
+ "ef5d9af4edfce077c6280338f0a64286"
+ "2e6bc27ebd5a4c91b3778e22631251c8"
+ "c5bb75a10945597a9d6c274fc82d3338"
+ "b403a0a549d1375f26e71ef22bce0941"
+ "93ea87e2ed72fce0546148c351eec3be"
+ "867bb1b96070c377fff3c98e21562beb"
+ "475cfe28abcaaedf49981f6599b15140"
+ "ea6130d24407079f18ba9d4a8960b082"
+ "b39c57320e2e064f02fde88c23112146"
+ "1cac3655868aef584714826ee4f361fb"
+ "e6d692e1589cbb9dd3c74fa628df2a1f"
+ "3b0029b1d62b7e9978013ed3c793c1dd"
+ "1f184c8f7022a853cac40b74ac749aa3"
+ "f33f0d14732dfda0f2c3c20591bf1f5a"
+ "710ec0d0bca342baa5146068a78ff58c"
+ "66316312b7a98af35a0f4e92799b4047"
+ "f047ae61f25c28d232ce5c168cc745d6"
+ "6da13cb0f9e38a696635dba7a21571cf"
+ "cd64ec8cc33db7879f59a90d9edd00f6"
+ "a899e39ab36b9269a3ac04ebad9326bf"
+ "53cd9b400168a61714cd628a4056d236"
+ "bd8622c76daa54cb65f5db2fe03bafbe"
+ "0b23549ae31136f607293e8093a21934"
+ "74fd5e9c2451b4c8e0499e6ad34fafc8"
+ "ab77722a282f7f84b14ddebf7e696300"
+ "c1ef92d4a0263c6cca104530f996e272"
+ "f58992ff68d642b071a5848dc4acf2ae"
+ "28fb1f27ae0f297d5136a7a0a4a03e89"
+ "b588755b8217a1c62773790e69261269"
+ "19f45daf7b3ccf18e3fc590a9a0e172f"
+ "033ac4d13c3decc4c62d7de718ace802"
+ "140452dc850989f6762e3578bbb04be3"
+ "1a237c599c4649f4e586b2de"
}
};
gpg_error_t err = 0;
gcry_cipher_hd_t hde, hdd;
- unsigned char out[MAX_DATA_LEN];
+ unsigned char out[1024];
unsigned char tag[16];
int tidx;
@@ -4548,7 +4651,7 @@ do_check_ocb_cipher (int inplace)
}
else
{
- err = gcry_cipher_encrypt (hde, out, MAX_DATA_LEN,
+ err = gcry_cipher_encrypt (hde, out, sizeof(out),
plain, plainlen);
}
}
@@ -4605,7 +4708,7 @@ do_check_ocb_cipher (int inplace)
}
else
{
- unsigned char tmp[MAX_DATA_LEN];
+ unsigned char tmp[sizeof(out)];
memcpy(tmp, out, plainlen);
err = gcry_cipher_decrypt (hdd, out, plainlen, tmp, plainlen);
@@ -4696,7 +4799,7 @@ check_ocb_cipher_largebuf_split (int algo, int keylen, const char *tagexpect,
}
for (i = 0; i < buflen; i++)
- inbuf[i] = 'a';
+ inbuf[i] = (i + 181081) * 5039;
err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0);
if (!err)
@@ -4854,6 +4957,131 @@ out_free:
}
+static void
+check_ocb_cipher_checksum (int algo, int keylen)
+{
+ static const unsigned char key[32] =
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
+ static const unsigned char nonce[12] =
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03";
+ const size_t buflen = 128 * 16;
+ unsigned char *inbuf, *outbuf;
+ gpg_error_t err = 0;
+ gcry_cipher_hd_t hde, hde2;
+ unsigned char tag[16];
+ unsigned char tag2[16];
+ int i;
+
+ inbuf = xmalloc(buflen);
+ if (!inbuf)
+ {
+ fail ("out-of-memory\n");
+ return;
+ }
+ outbuf = xmalloc(buflen);
+ if (!inbuf)
+ {
+ fail ("out-of-memory\n");
+ xfree(inbuf);
+ return;
+ }
+
+ memset(inbuf, 0, buflen);
+ for (i = 0; i < 128; i += 16)
+ {
+ unsigned char *blk = inbuf + i;
+ int bit2set = i / 16;
+ int byteidx = bit2set / 8;
+ int bitpos = bit2set % 8;
+
+ blk[byteidx] |= 1 << bitpos;
+ }
+
+ err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0);
+ if (!err)
+ err = gcry_cipher_open (&hde2, algo, GCRY_CIPHER_MODE_OCB, 0);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_open failed (checksum, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ goto out_free;
+ }
+
+ err = gcry_cipher_setkey (hde, key, keylen);
+ if (!err)
+ err = gcry_cipher_setkey (hde2, key, keylen);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_setkey failed (checksum, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hde2);
+ goto out_free;
+ }
+
+ err = gcry_cipher_setiv (hde, nonce, 12);
+ if (!err)
+ err = gcry_cipher_setiv (hde2, nonce, 12);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_setiv failed (checksum, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hde2);
+ goto out_free;
+ }
+
+ err = gcry_cipher_final (hde);
+ if (!err)
+ {
+ err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen);
+ }
+ for (i = 0; i < buflen && !err; i += 16)
+ {
+ if (i + 16 == buflen)
+ err = gcry_cipher_final (hde2);
+ if (!err)
+ err = gcry_cipher_encrypt (hde2, outbuf + i, 16, inbuf + i, 16);
+ }
+
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_encrypt failed (checksum, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hde2);
+ goto out_free;
+ }
+
+ /* Check that the tag matches. */
+ err = gcry_cipher_gettag (hde, tag, 16);
+ if (err)
+ {
+ fail ("cipher_ocb, gcry_cipher_gettag failed (checksum, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ }
+ err = gcry_cipher_gettag (hde2, tag2, 16);
+ if (err)
+ {
+ fail ("cipher_ocb, gcry_cipher_gettag failed (checksum2, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ }
+ if (memcmp (tag, tag2, 16))
+ {
+ mismatch (tag, 16, tag2, 16);
+ fail ("cipher-ocb, encrypt tag mismatch (checksum, algo %d)\n", algo);
+ }
+
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hde2);
+
+out_free:
+ xfree(inbuf);
+ xfree(outbuf);
+}
+
+
static void
check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
{
@@ -4863,6 +5091,8 @@ check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
{
check_ocb_cipher_largebuf_split(algo, keylen, tagexpect, split);
}
+
+ check_ocb_cipher_checksum(algo, keylen);
}
@@ -5108,35 +5338,25 @@ check_ocb_cipher (void)
/* Check large buffer encryption/decryption. */
check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16,
- "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8"
- "\x33\xfd\x7a\x4f\x42\x60\x5d\x20");
+ "\xc1\x5b\xf1\x80\xa4\xd5\xea\xfd\xae\x17\xa6\xcd\x6b\x10\xa8\xea");
check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32,
- "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d"
- "\xfe\x96\x67\xc9\xc8\x41\x03\x51");
+ "\x2b\xb7\x25\x6b\x77\xc7\xfb\x21\x5c\xc9\x6c\x36\x17\x1a\x1a\xd5");
check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16,
- "\x28\x23\x38\x45\x2b\xfd\x42\x45"
- "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd");
+ "\xe0\xae\x3f\x29\x3a\xee\xd8\xe3\xf2\x20\xc1\xa2\xd8\x72\x12\xd9");
check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24,
- "\xee\xca\xe5\x39\x27\x2d\x33\xe7"
- "\x79\x74\xb0\x1d\x37\x12\xd5\x6c");
+ "\xd7\x98\x71\xcf\x19\x5c\xa3\x3d\x6c\xfc\xc9\xbe\x9f\x13\x6b\xbd");
check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32,
- "\x39\x39\xd0\x2d\x05\x68\x74\xee"
- "\x18\x6b\xea\x3d\x0b\xd3\x58\xae");
+ "\x03\xf6\xec\x1a\x0e\xae\x66\x24\x2b\xba\x26\x0f\xb3\xb3\x1f\xb9");
check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 16,
- "\x63\xe3\x0e\xb9\x11\x6f\x14\xba"
- "\x79\xe4\xa7\x9e\xad\x3c\x02\x0c");
+ "\x1c\xf9\xc7\xfc\x3a\x32\xac\xc7\x5e\x0a\xc2\x5c\x90\xd6\xf6\xf9");
check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32,
- "\xf6\xd4\xfe\x4e\x50\x85\x13\x59"
- "\x69\x0e\x4c\x67\x3e\xdd\x47\x90");
+ "\x53\x02\xc8\x0d\x4e\x9a\x44\x9e\x43\xd4\xaa\x06\x30\x93\xcc\x16");
check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT128, 16,
- "\x3c\xfb\x66\x14\x3c\xc8\x6c\x67"
- "\x26\xb8\x23\xeb\xaf\x43\x98\x69");
+ "\xd3\x64\xac\x40\x48\x88\x77\xe2\x41\x26\x4c\xde\x21\x29\x21\x8d");
check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT192, 24,
- "\x5e\x62\x27\xc5\x32\xc3\x1d\xe6"
- "\x2e\x65\xe7\xd6\xfb\x05\xd7\xb2");
+ "\x99\xeb\x35\xb0\x62\x4e\x7b\xf1\x5e\x9f\xed\x32\x78\x90\x0b\xd0");
check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT256, 32,
- "\xe7\x8b\xe6\xd4\x2f\x7a\x36\x4c"
- "\xba\xee\x20\xe2\x68\xf4\xcb\xcc");
+ "\x71\x66\x2f\x68\xbf\xdd\xcc\xb1\xbf\x81\x56\x5f\x01\x73\xeb\x44");
/* Check that the AAD data is correctly buffered. */
check_ocb_cipher_splitaad ();
More information about the Gcrypt-devel
mailing list