!148 修复aes-128-ctr算法在aarch64平台上存在性能下降的问题

From: @fangxiuning Reviewed-by: @zhujianwei001 Signed-off-by: @zhujianwei001
2022-10-20 07:48:27 +00:00 · 2022-10-20 07:48:27 +00:00 · 3b0e96c594
commit 3b0e96c594
parent 3f384550bd 615375473f
2 changed files with 151 additions and 1 deletions
--- a/Fix-reported-performance-degradation-on-aarch64.patch
+++ b/Fix-reported-performance-degradation-on-aarch64.patch
@ -0,0 +1,146 @@
+From a8f6d73fda64d514171e99a50d1483c0c0b8d968 Mon Sep 17 00:00:00 2001
+From: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Date: Sun, 12 Jun 2022 09:37:26 +0200
+Subject: [PATCH] Fix reported performance degradation on aarch64
+
+This restores the implementation prior to
+commit 2621751 ("aes/asm/aesv8-armx.pl: avoid 32-bit lane assignment in CTR mode")
+for 64bit targets only, since it is reportedly 2-17% slower,
+and the silicon errata only affects 32bit targets.
+Only for 32bit targets the new algorithm is used.
+
+Fixes #18445
+
+Reviewed-by: Tomas Mraz <tomas@openssl.org>
+Reviewed-by: Paul Dale <pauli@openssl.org>
+Reviewed-by: Hugo Landau <hlandau@openssl.org>
+(Merged from https://github.com/openssl/openssl/pull/18539)
+---
+ crypto/aes/asm/aesv8-armx.pl | 62 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 62 insertions(+)
+
+diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
+index 2b0e982..1856d99 100755
+--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
+@@ -740,6 +740,21 @@ $code.=<<___;
+ #ifndef __ARMEB__
+ 	rev		$ctr, $ctr
+ #endif
+___
+$code.=<<___	if ($flavour =~ /64/);
+	vorr		$dat1,$dat0,$dat0
+	add		$tctr1, $ctr, #1
+	vorr		$dat2,$dat0,$dat0
+	add		$ctr, $ctr, #2
+	vorr		$ivec,$dat0,$dat0
+	rev		$tctr1, $tctr1
+	vmov.32		${dat1}[3],$tctr1
+	b.ls		.Lctr32_tail
+	rev		$tctr2, $ctr
+	sub		$len,$len,#3		// bias
+	vmov.32		${dat2}[3],$tctr2
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	add		$tctr1, $ctr, #1
+ 	vorr		$ivec,$dat0,$dat0
+ 	rev		$tctr1, $tctr1
+@@ -751,6 +766,8 @@ $code.=<<___;
+ 	vmov.32		${ivec}[3],$tctr2
+ 	sub		$len,$len,#3		// bias
+ 	vorr		$dat2,$ivec,$ivec
+___
+$code.=<<___;
+ 	b		.Loop3x_ctr32
+ 
+ .align	4
+@@ -777,11 +794,25 @@ $code.=<<___;
+ 	aese		$dat1,q8
+ 	aesmc		$tmp1,$dat1
+ 	 vld1.8		{$in0},[$inp],#16
+___
+$code.=<<___	if ($flavour =~ /64/);
+	 vorr		$dat0,$ivec,$ivec
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	 add		$tctr0,$ctr,#1
+___
+$code.=<<___;
+ 	aese		$dat2,q8
+ 	aesmc		$dat2,$dat2
+ 	 vld1.8		{$in1},[$inp],#16
+___
+$code.=<<___	if ($flavour =~ /64/);
+	 vorr		$dat1,$ivec,$ivec
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	 rev		$tctr0,$tctr0
+___
+$code.=<<___;
+ 	aese		$tmp0,q9
+ 	aesmc		$tmp0,$tmp0
+ 	aese		$tmp1,q9
+@@ -790,6 +821,12 @@ $code.=<<___;
+ 	 mov		$key_,$key
+ 	aese		$dat2,q9
+ 	aesmc		$tmp2,$dat2
+___
+$code.=<<___	if ($flavour =~ /64/);
+	 vorr		$dat2,$ivec,$ivec
+	 add		$tctr0,$ctr,#1
+___
+$code.=<<___;
+ 	aese		$tmp0,q12
+ 	aesmc		$tmp0,$tmp0
+ 	aese		$tmp1,q12
+@@ -805,22 +842,47 @@ $code.=<<___;
+ 	aese		$tmp1,q13
+ 	aesmc		$tmp1,$tmp1
+ 	 veor		$in2,$in2,$rndlast
+___
+$code.=<<___	if ($flavour =~ /64/);
+	 rev		$tctr0,$tctr0
+	aese		$tmp2,q13
+	aesmc		$tmp2,$tmp2
+	 vmov.32	${dat0}[3], $tctr0
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	 vmov.32	${ivec}[3], $tctr0
+ 	aese		$tmp2,q13
+ 	aesmc		$tmp2,$tmp2
+ 	 vorr		$dat0,$ivec,$ivec
+___
+$code.=<<___;
+ 	 rev		$tctr1,$tctr1
+ 	aese		$tmp0,q14
+ 	aesmc		$tmp0,$tmp0
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	 vmov.32	${ivec}[3], $tctr1
+ 	 rev		$tctr2,$ctr
+___
+$code.=<<___;
+ 	aese		$tmp1,q14
+ 	aesmc		$tmp1,$tmp1
+___
+$code.=<<___	if ($flavour =~ /64/);
+	 vmov.32	${dat1}[3], $tctr1
+	 rev		$tctr2,$ctr
+	aese		$tmp2,q14
+	aesmc		$tmp2,$tmp2
+	 vmov.32	${dat2}[3], $tctr2
+___
+$code.=<<___	if ($flavour !~ /64/);
+ 	 vorr		$dat1,$ivec,$ivec
+ 	 vmov.32	${ivec}[3], $tctr2
+ 	aese		$tmp2,q14
+ 	aesmc		$tmp2,$tmp2
+ 	 vorr		$dat2,$ivec,$ivec
+___
+$code.=<<___;
+ 	 subs		$len,$len,#3
+ 	aese		$tmp0,q15
+ 	aese		$tmp1,q15
+-- 
+1.8.3.1
+
--- a/openssl.spec
+++ b/openssl.spec
@ -2,7 +2,7 @@
 Name:        openssl
 Epoch:       1
 Version:     1.1.1m
-Release:     8
+Release:     9
 Summary:     Cryptography and SSL/TLS Toolkit
 License:     OpenSSL and SSLeay
 URL:         https://www.openssl.org/
@ -33,6 +33,7 @@ Patch22:     Feature-X509-command-supports-SM2-certificate-signing-with-default-
 Patch23:     CVE-2022-2068-Fix-file-operations-in-c_rehash.patch
 Patch24:     CVE-2022-2097-Fix-AES-OCB-encrypt-decrypt-for-x86-AES-NI.patch
 Patch25:     Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch
+Patch26:     Fix-reported-performance-degradation-on-aarch64.patch 

 BuildRequires: gcc perl make lksctp-tools-devel coreutils util-linux zlib-devel
 Requires:    coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release}
@ -235,6 +236,9 @@ make test || :
 %ldconfig_scriptlets libs

 %changelog
+* Thu Oct 20 2022 fangxiuning <fangxiuning@huawei.com> - 1:1.1.1m-9
+- fix proformance degradation on aarch64
+
 * Tue Sep 13 2022 Xu Yizhou <xuyizhou1@huawei.com> - 1:1.1.1m-8
 - add ARMv8 implementations of SM4 in ECB and XTS