[PATCH 1/3] Use 'vmov' and 'movi' for vector register clearing in ARM assembly
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sat Jan 8 12:06:10 CET 2022
* cipher/chacha20-aarch64.S (clear): Use 'movi'.
* cipher/chacha20-armv7-neon.S (clear): Use 'vmov'.
* cipher/cipher-gcm-armv7-neon.S (clear): Use 'vmov'.
* cipher/cipher-gcm-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/cipher-gcm-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/rijndael-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha1-armv7-neon.S (clear): Use 'vmov'.
* cipher/sha1-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha1-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/sha256-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha256-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/sha512-armv7-neon.S (CLEAR_REG): New using 'vmov'.
(_gcry_sha512_transform_armv7_neon): Use CLEAR_REG for clearing
registers.
--
Use 'vmov reg, #0' on 32-bit and 'movi reg.16b, #0' instead of
self-xoring register to break false register dependency.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/chacha20-aarch64.S | 2 +-
cipher/chacha20-armv7-neon.S | 2 +-
cipher/cipher-gcm-armv7-neon.S | 2 +-
cipher/cipher-gcm-armv8-aarch32-ce.S | 2 +-
cipher/cipher-gcm-armv8-aarch64-ce.S | 2 +-
cipher/rijndael-armv8-aarch32-ce.S | 2 +-
cipher/sha1-armv7-neon.S | 2 +-
cipher/sha1-armv8-aarch32-ce.S | 2 +-
cipher/sha1-armv8-aarch64-ce.S | 2 +-
cipher/sha256-armv8-aarch32-ce.S | 2 +-
cipher/sha256-armv8-aarch64-ce.S | 2 +-
cipher/sha512-armv7-neon.S | 26 ++++++++++++++------------
12 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index b8f9724a..4f76834b 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -110,7 +110,7 @@
vpunpcklqdq(x2, t2, x2);
#define clear(x) \
- eor x.16b, x.16b, x.16b;
+ movi x.16b, #0;
/**********************************************************************
4-way chacha20
diff --git a/cipher/chacha20-armv7-neon.S b/cipher/chacha20-armv7-neon.S
index 33a43df1..a862be4e 100644
--- a/cipher/chacha20-armv7-neon.S
+++ b/cipher/chacha20-armv7-neon.S
@@ -132,7 +132,7 @@
vswp _q0##h, _q2##l; \
vswp _q1##h, _q3##l;
-#define clear(x) veor x,x,x;
+#define clear(x) vmov.i8 x, #0;
/**********************************************************************
4-way chacha20
diff --git a/cipher/cipher-gcm-armv7-neon.S b/cipher/cipher-gcm-armv7-neon.S
index a801a5e5..16502b4a 100644
--- a/cipher/cipher-gcm-armv7-neon.S
+++ b/cipher/cipher-gcm-armv7-neon.S
@@ -210,7 +210,7 @@ gcry_gcm_reduction_constant:
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/cipher-gcm-armv8-aarch32-ce.S b/cipher/cipher-gcm-armv8-aarch32-ce.S
index 1de66a16..fb51b339 100644
--- a/cipher/cipher-gcm-armv8-aarch32-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch32-ce.S
@@ -180,7 +180,7 @@ gcry_gcm_reduction_constant:
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 877207d3..13ee83ed 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -149,7 +149,7 @@ gcry_gcm_reduction_constant:
#define _(...) __VA_ARGS__
#define __ _()
-#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+#define CLEAR_REG(reg) movi reg.16b, #0;
#define VPUSH_ABI \
stp d8, d9, [sp, #-16]!; \
diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S
index 6d78af0a..1eafa93e 100644
--- a/cipher/rijndael-armv8-aarch32-ce.S
+++ b/cipher/rijndael-armv8-aarch32-ce.S
@@ -249,7 +249,7 @@
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/sha1-armv7-neon.S b/cipher/sha1-armv7-neon.S
index 61cc541c..2de678b8 100644
--- a/cipher/sha1-armv7-neon.S
+++ b/cipher/sha1-armv7-neon.S
@@ -303,7 +303,7 @@ gcry_sha1_armv7_neon_K_VEC:
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/sha1-armv8-aarch32-ce.S b/cipher/sha1-armv8-aarch32-ce.S
index bf2b233b..059b9a85 100644
--- a/cipher/sha1-armv8-aarch32-ce.S
+++ b/cipher/sha1-armv8-aarch32-ce.S
@@ -100,7 +100,7 @@ gcry_sha1_aarch32_ce_K_VEC:
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index 223268ca..8ea1486b 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -88,7 +88,7 @@ gcry_sha1_aarch64_ce_K_VEC:
/* Other functional macros */
-#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+#define CLEAR_REG(reg) movi reg.16b, #0;
/*
diff --git a/cipher/sha256-armv8-aarch32-ce.S b/cipher/sha256-armv8-aarch32-ce.S
index 2b17ab1b..95778b40 100644
--- a/cipher/sha256-armv8-aarch32-ce.S
+++ b/cipher/sha256-armv8-aarch32-ce.S
@@ -111,7 +111,7 @@ gcry_sha256_aarch32_ce_K:
/* Other functional macros */
-#define CLEAR_REG(reg) veor reg, reg;
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
/*
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index f57cae29..5c39e83e 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -98,7 +98,7 @@ gcry_sha256_aarch64_ce_K:
/* Other functional macros */
-#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+#define CLEAR_REG(reg) movi reg.16b, #0;
/*
diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S
index 6596f2cd..2b186b47 100644
--- a/cipher/sha512-armv7-neon.S
+++ b/cipher/sha512-armv7-neon.S
@@ -91,6 +91,8 @@
#define RW1213q q14
#define RW1415q q15
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
/***********************************************************************
* ARM assembly implementation of sha512 transform
***********************************************************************/
@@ -426,22 +428,22 @@ _gcry_sha512_transform_armv7_neon:
/* Clear used registers */
/* d16-d31 */
- veor.u64 RW01q, RW01q;
- veor.u64 RW23q, RW23q;
- veor.u64 RW45q, RW45q;
- veor.u64 RW67q, RW67q;
+ CLEAR_REG(RW01q);
+ CLEAR_REG(RW23q);
+ CLEAR_REG(RW45q);
+ CLEAR_REG(RW67q);
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
- veor.u64 RW89q, RW89q;
- veor.u64 RW1011q, RW1011q;
- veor.u64 RW1213q, RW1213q;
- veor.u64 RW1415q, RW1415q;
+ CLEAR_REG(RW89q);
+ CLEAR_REG(RW1011q);
+ CLEAR_REG(RW1213q);
+ CLEAR_REG(RW1415q);
/* d8-d15 */
vpop {RT0-RT7};
/* d0-d7 (q0-q3) */
- veor.u64 %q0, %q0;
- veor.u64 %q1, %q1;
- veor.u64 %q2, %q2;
- veor.u64 %q3, %q3;
+ CLEAR_REG(%q0);
+ CLEAR_REG(%q1);
+ CLEAR_REG(%q2);
+ CLEAR_REG(%q3);
eor %r0, %r0;
pop {%pc};
--
2.32.0
More information about the Gcrypt-devel
mailing list