[PATCH 02/10] Enable AMD64 Blowfish implementation on WIN64
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu May 14 13:11:08 CEST 2015
* cipher/blowfish-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/blowfish.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(do_encrypt, do_encrypt_block, do_decrypt_block)
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
function through 'call_sysv_fn'.
(blowfish_amd64_ctr_enc, blowfish_amd64_cbc_dec)
(blowfish_amd64_cfb_dec): New wrapper functions for bulk
assembly functions.
..
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/blowfish-amd64.S | 46 +++++++++++++++++------------
cipher/blowfish.c | 74 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 97 insertions(+), 23 deletions(-)
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 87b676f..21b63fc 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -20,7 +20,15 @@
#ifdef __x86_64
#include <config.h>
-#if defined(USE_BLOWFISH) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_BLOWFISH) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
@@ -120,7 +128,7 @@
movq RX0, (RIO);
.align 8
-.type __blowfish_enc_blk1, at function;
+ELF(.type __blowfish_enc_blk1, at function;)
__blowfish_enc_blk1:
/* input:
@@ -145,11 +153,11 @@ __blowfish_enc_blk1:
movq %r11, %rbp;
ret;
-.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
.align 8
.globl _gcry_blowfish_amd64_do_encrypt
-.type _gcry_blowfish_amd64_do_encrypt, at function;
+ELF(.type _gcry_blowfish_amd64_do_encrypt, at function;)
_gcry_blowfish_amd64_do_encrypt:
/* input:
@@ -171,11 +179,11 @@ _gcry_blowfish_amd64_do_encrypt:
movl RX0d, (RX2);
ret;
-.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
.align 8
.globl _gcry_blowfish_amd64_encrypt_block
-.type _gcry_blowfish_amd64_encrypt_block, at function;
+ELF(.type _gcry_blowfish_amd64_encrypt_block, at function;)
_gcry_blowfish_amd64_encrypt_block:
/* input:
@@ -195,11 +203,11 @@ _gcry_blowfish_amd64_encrypt_block:
write_block();
ret;
-.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;
+ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
.align 8
.globl _gcry_blowfish_amd64_decrypt_block
-.type _gcry_blowfish_amd64_decrypt_block, at function;
+ELF(.type _gcry_blowfish_amd64_decrypt_block, at function;)
_gcry_blowfish_amd64_decrypt_block:
/* input:
@@ -231,7 +239,7 @@ _gcry_blowfish_amd64_decrypt_block:
movq %r11, %rbp;
ret;
-.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;
+ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
/**********************************************************************
4-way blowfish, four blocks parallel
@@ -319,7 +327,7 @@ _gcry_blowfish_amd64_decrypt_block:
bswapq RX3;
.align 8
-.type __blowfish_enc_blk4, at function;
+ELF(.type __blowfish_enc_blk4, at function;)
__blowfish_enc_blk4:
/* input:
@@ -343,10 +351,10 @@ __blowfish_enc_blk4:
outbswap_block4();
ret;
-.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
.align 8
-.type __blowfish_dec_blk4, at function;
+ELF(.type __blowfish_dec_blk4, at function;)
__blowfish_dec_blk4:
/* input:
@@ -372,11 +380,11 @@ __blowfish_dec_blk4:
outbswap_block4();
ret;
-.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
.align 8
.globl _gcry_blowfish_amd64_ctr_enc
-.type _gcry_blowfish_amd64_ctr_enc, at function;
+ELF(.type _gcry_blowfish_amd64_ctr_enc, at function;)
_gcry_blowfish_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -429,11 +437,11 @@ _gcry_blowfish_amd64_ctr_enc:
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
.align 8
.globl _gcry_blowfish_amd64_cbc_dec
-.type _gcry_blowfish_amd64_cbc_dec, at function;
+ELF(.type _gcry_blowfish_amd64_cbc_dec, at function;)
_gcry_blowfish_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -477,11 +485,11 @@ _gcry_blowfish_amd64_cbc_dec:
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
.align 8
.globl _gcry_blowfish_amd64_cfb_dec
-.type _gcry_blowfish_amd64_cfb_dec, at function;
+ELF(.type _gcry_blowfish_amd64_cfb_dec, at function;)
_gcry_blowfish_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -527,7 +535,7 @@ _gcry_blowfish_amd64_cfb_dec:
popq %rbx;
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
#endif /*defined(USE_BLOWFISH)*/
#endif /*__x86_64*/
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index ae470d8..a3fc26c 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -45,7 +45,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
(BLOWFISH_ROUNDS == 16)
# define USE_AMD64_ASM 1
#endif
@@ -280,22 +281,87 @@ extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
const byte *in, byte *iv);
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+ const void *arg3, const void *arg4)
+{
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (arg1),
+ "+S" (arg2),
+ "+d" (arg3),
+ "+c" (arg4)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
static void
do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL);
+#else
_gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+#endif
}
static void
do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf,
+ NULL);
+#else
_gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
}
static void
do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf,
+ NULL);
+#else
_gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+ _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
+#endif
+}
+
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv);
+#else
+ _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+#endif
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv);
+#else
+ _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+#endif
}
static unsigned int
@@ -605,7 +671,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+ blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -674,7 +740,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+ blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -734,7 +800,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+ blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
More information about the Gcrypt-devel
mailing list