[PATCH] Enable VIA Padlock on x86_64 platforms
Rafaël Carré
funman at videolan.org
Wed Apr 11 06:20:14 CEST 2012
Convert existing 32bits asm to 64bits:
- *l -> *q (long->quad)
- e** registers -> r** registers
- don't mess with ebx GOT register
Tested with make check on VIA Nano X2 L4350
---
Note: make check actually fails, but it's not a regression from 32 bits,
where that test fails with the exact same output.
% ./tests/basic
aes-ctr, encrypt mismatch entry 0:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 0:1
expected: 98 06 f6 6b 79 70 fd ff 86 17 18 7b b9 ff fd ff
computed: 2b 2d 2c db 8c 5b d3 ee d1 57 79 37 9d 09 10 ed
aes-ctr, encrypt mismatch entry 0:2
expected: 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 0:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
aes-ctr, encrypt mismatch entry 3:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 4:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:2
expected: ff 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: ff 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 7:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
encrypt mismatch (algo 7, mode 6)
cipher/rijndael.c | 21 +++++++++---
random/rndhw.c | 13 ++++++-
src/hwfeatures.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 125 insertions(+), 6 deletions(-)
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c2e0a77 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -63,7 +63,7 @@
code. */
#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( ( defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 ) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK 1
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -663,17 +663,28 @@ do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
memcpy (a, ax, 16);
+ int blocks = 1; /* Init counter for just one block. */
+#ifdef __x86_64__
+ asm volatile
+ ("pushfq\n\t" /* Force key reload. */
+ "popfq\n\t"
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ : /* No output */
+ : "S" (a), "D" (b), "d" (cword), "b" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
+ );
+#else
asm volatile
("pushfl\n\t" /* Force key reload. */
"popfl\n\t"
"xchg %3, %%ebx\n\t" /* Load key. */
- "movl $1, %%ecx\n\t" /* Init counter for just one block. */
- ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
"xchg %3, %%ebx\n" /* Restore GOT register. */
: /* No output */
- : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
- : "%ecx", "cc", "memory"
+ : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
);
+#endif
memcpy (bx, b, 16);
diff --git a/random/rndhw.c b/random/rndhw.c
index 82faab4..c933cf9 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -27,7 +27,7 @@
#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -55,6 +55,16 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
nbytes = 0;
while (nbytes < 64)
{
+#ifdef __x86_64__
+ asm volatile
+ ("movq %1, %%rdi\n\t" /* Set buffer. */
+ "xorq %%rdx, %%rdx\n\t" /* Request up to 8 bytes. */
+ ".byte 0x0f, 0xa7, 0xc0\n\t" /* XSTORE RNG. */
+ : "=a" (status)
+ : "g" (p)
+ : "%rdx", "%rdi", "cc"
+ );
+#else
asm volatile
("movl %1, %%edi\n\t" /* Set buffer. */
"xorl %%edx, %%edx\n\t" /* Request up to 8 bytes. */
@@ -63,6 +73,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
: "g" (p)
: "%edx", "%edi", "cc"
);
+#endif
if ((status & (1<<6)) /* RNG still enabled. */
&& !(status & (1<<13)) /* von Neumann corrector is enabled. */
&& !(status & (1<<14)) /* String filter is disabled. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index c356798..73db917 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -40,6 +40,99 @@ _gcry_get_hw_features (void)
}
+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+detect_x86_64_gnuc (void)
+{
+ /* The code here is only useful for the PadLock engine thus we don't
+ build it if that support has been disabled. */
+ char vendor_id[12+1];
+
+ asm volatile
+ ("xorl %%eax, %%eax\n\t" /* 0 -> EAX. */
+ "cpuid\n\t" /* Get vendor ID. */
+ "movl %%ebx, (%0)\n\t" /* EBX,EDX,ECX -> VENDOR_ID. */
+ "movl %%edx, 4(%0)\n\t"
+ "movl %%ecx, 8(%0)\n\t"
+ :
+ : "S" (&vendor_id[0])
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ vendor_id[12] = 0;
+
+ if (0)
+ ; /* Just to make "else if" and ifdef macros look pretty. */
+#ifdef ENABLE_PADLOCK_SUPPORT
+ else if (!strcmp (vendor_id, "CentaurHauls"))
+ {
+ /* This is a VIA CPU. Check what PadLock features we have. */
+ asm volatile
+ ("movl $0xC0000000, %%eax\n\t" /* Check for extended centaur */
+ "cpuid\n\t" /* feature flags. */
+ "cmpl $0xC0000001, %%eax\n\t"
+ "jb .Lready%=\n\t" /* EAX < 0xC0000000 => no padlock. */
+
+ "movl $0xC0000001, %%eax\n\t" /* Ask for the extended */
+ "cpuid\n\t" /* feature flags. */
+
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x0C, %%eax\n\t" /* Test bits 2 and 3 to see whether */
+ "cmpl $0x0C, %%eax\n\t" /* the RNG exists and is enabled. */
+ "jnz .Lno_rng%=\n\t"
+ "orl $1, %0\n" /* Set our HWF_PADLOCK_RNG bit. */
+
+ ".Lno_rng%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC0, %%eax\n\t" /* Test bits 6 and 7 to see whether */
+ "cmpl $0xC0, %%eax\n\t" /* the ACE exists and is enabled. */
+ "jnz .Lno_ace%=\n\t"
+ "orl $2, %0\n" /* Set our HWF_PADLOCK_AES bit. */
+
+ ".Lno_ace%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC00, %%eax\n\t" /* Test bits 10, 11 to see whether */
+ "cmpl $0xC00, %%eax\n\t" /* the PHE exists and is enabled. */
+ "jnz .Lno_phe%=\n\t"
+ "orl $4, %0\n" /* Set our HWF_PADLOCK_SHA bit. */
+
+ ".Lno_phe%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x3000, %%eax\n\t" /* Test bits 12, 13 to see whether */
+ "cmpl $0x3000, %%eax\n\t" /* MONTMUL exists and is enabled. */
+ "jnz .Lready%=\n\t"
+ "orl $8, %0\n" /* Set our HWF_PADLOCK_MMUL bit. */
+
+ ".Lready%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%edx", "cc"
+ );
+ }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+ else if (!strcmp (vendor_id, "GenuineIntel"))
+ {
+ /* This is an Intel CPU. */
+ asm volatile
+ ("movl $1, %%eax\n\t" /* Get CPU info and feature flags. */
+ "cpuid\n"
+ "testl $0x02000000, %%ecx\n\t" /* Test bit 25. */
+ "jz .Lno_aes%=\n\t" /* No AES support. */
+ "orl $256, %0\n" /* Set our HWF_INTEL_AES bit. */
+
+ ".Lno_aes%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ }
+ else if (!strcmp (vendor_id, "AuthenticAMD"))
+ {
+ /* This is an AMD CPU. */
+
+ }
+}
+#endif /* __x86_64__ && __GNUC__ */
+
#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
static void
detect_ia32_gnuc (void)
@@ -186,6 +279,10 @@ _gcry_detect_hw_features (unsigned int disabled_features)
#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
#ifdef __GNUC__
#endif
+#elif defined (__x86_64__)
+#ifdef __GNUC__
+ detect_x86_64_gnuc ();
+#endif
#endif
hw_features &= ~disabled_features;
--
1.7.9.1
More information about the Gcrypt-devel
mailing list