!148 修复aes-128-ctr算法在aarch64平台上存在性能下降的问题
From: @fangxiuning Reviewed-by: @zhujianwei001 Signed-off-by: @zhujianwei001
This commit is contained in:
commit
3b0e96c594
146
Fix-reported-performance-degradation-on-aarch64.patch
Normal file
146
Fix-reported-performance-degradation-on-aarch64.patch
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
From a8f6d73fda64d514171e99a50d1483c0c0b8d968 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||||
|
Date: Sun, 12 Jun 2022 09:37:26 +0200
|
||||||
|
Subject: [PATCH] Fix reported performance degradation on aarch64
|
||||||
|
|
||||||
|
This restores the implementation prior to
|
||||||
|
commit 2621751 ("aes/asm/aesv8-armx.pl: avoid 32-bit lane assignment in CTR mode")
|
||||||
|
for 64bit targets only, since it is reportedly 2-17% slower,
|
||||||
|
and the silicon errata only affects 32bit targets.
|
||||||
|
Only for 32bit targets the new algorithm is used.
|
||||||
|
|
||||||
|
Fixes #18445
|
||||||
|
|
||||||
|
Reviewed-by: Tomas Mraz <tomas@openssl.org>
|
||||||
|
Reviewed-by: Paul Dale <pauli@openssl.org>
|
||||||
|
Reviewed-by: Hugo Landau <hlandau@openssl.org>
|
||||||
|
(Merged from https://github.com/openssl/openssl/pull/18539)
|
||||||
|
---
|
||||||
|
crypto/aes/asm/aesv8-armx.pl | 62 ++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
1 file changed, 62 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
|
||||||
|
index 2b0e982..1856d99 100755
|
||||||
|
--- a/crypto/aes/asm/aesv8-armx.pl
|
||||||
|
+++ b/crypto/aes/asm/aesv8-armx.pl
|
||||||
|
@@ -740,6 +740,21 @@ $code.=<<___;
|
||||||
|
#ifndef __ARMEB__
|
||||||
|
rev $ctr, $ctr
|
||||||
|
#endif
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ vorr $dat1,$dat0,$dat0
|
||||||
|
+ add $tctr1, $ctr, #1
|
||||||
|
+ vorr $dat2,$dat0,$dat0
|
||||||
|
+ add $ctr, $ctr, #2
|
||||||
|
+ vorr $ivec,$dat0,$dat0
|
||||||
|
+ rev $tctr1, $tctr1
|
||||||
|
+ vmov.32 ${dat1}[3],$tctr1
|
||||||
|
+ b.ls .Lctr32_tail
|
||||||
|
+ rev $tctr2, $ctr
|
||||||
|
+ sub $len,$len,#3 // bias
|
||||||
|
+ vmov.32 ${dat2}[3],$tctr2
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
add $tctr1, $ctr, #1
|
||||||
|
vorr $ivec,$dat0,$dat0
|
||||||
|
rev $tctr1, $tctr1
|
||||||
|
@@ -751,6 +766,8 @@ $code.=<<___;
|
||||||
|
vmov.32 ${ivec}[3],$tctr2
|
||||||
|
sub $len,$len,#3 // bias
|
||||||
|
vorr $dat2,$ivec,$ivec
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
b .Loop3x_ctr32
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
@@ -777,11 +794,25 @@ $code.=<<___;
|
||||||
|
aese $dat1,q8
|
||||||
|
aesmc $tmp1,$dat1
|
||||||
|
vld1.8 {$in0},[$inp],#16
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ vorr $dat0,$ivec,$ivec
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
add $tctr0,$ctr,#1
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
aese $dat2,q8
|
||||||
|
aesmc $dat2,$dat2
|
||||||
|
vld1.8 {$in1},[$inp],#16
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ vorr $dat1,$ivec,$ivec
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
rev $tctr0,$tctr0
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
aese $tmp0,q9
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
aese $tmp1,q9
|
||||||
|
@@ -790,6 +821,12 @@ $code.=<<___;
|
||||||
|
mov $key_,$key
|
||||||
|
aese $dat2,q9
|
||||||
|
aesmc $tmp2,$dat2
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ vorr $dat2,$ivec,$ivec
|
||||||
|
+ add $tctr0,$ctr,#1
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
aese $tmp0,q12
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
aese $tmp1,q12
|
||||||
|
@@ -805,22 +842,47 @@ $code.=<<___;
|
||||||
|
aese $tmp1,q13
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
veor $in2,$in2,$rndlast
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ rev $tctr0,$tctr0
|
||||||
|
+ aese $tmp2,q13
|
||||||
|
+ aesmc $tmp2,$tmp2
|
||||||
|
+ vmov.32 ${dat0}[3], $tctr0
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
vmov.32 ${ivec}[3], $tctr0
|
||||||
|
aese $tmp2,q13
|
||||||
|
aesmc $tmp2,$tmp2
|
||||||
|
vorr $dat0,$ivec,$ivec
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
rev $tctr1,$tctr1
|
||||||
|
aese $tmp0,q14
|
||||||
|
aesmc $tmp0,$tmp0
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
vmov.32 ${ivec}[3], $tctr1
|
||||||
|
rev $tctr2,$ctr
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
aese $tmp1,q14
|
||||||
|
aesmc $tmp1,$tmp1
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour =~ /64/);
|
||||||
|
+ vmov.32 ${dat1}[3], $tctr1
|
||||||
|
+ rev $tctr2,$ctr
|
||||||
|
+ aese $tmp2,q14
|
||||||
|
+ aesmc $tmp2,$tmp2
|
||||||
|
+ vmov.32 ${dat2}[3], $tctr2
|
||||||
|
+___
|
||||||
|
+$code.=<<___ if ($flavour !~ /64/);
|
||||||
|
vorr $dat1,$ivec,$ivec
|
||||||
|
vmov.32 ${ivec}[3], $tctr2
|
||||||
|
aese $tmp2,q14
|
||||||
|
aesmc $tmp2,$tmp2
|
||||||
|
vorr $dat2,$ivec,$ivec
|
||||||
|
+___
|
||||||
|
+$code.=<<___;
|
||||||
|
subs $len,$len,#3
|
||||||
|
aese $tmp0,q15
|
||||||
|
aese $tmp1,q15
|
||||||
|
--
|
||||||
|
1.8.3.1
|
||||||
|
|
||||||
@ -2,7 +2,7 @@
|
|||||||
Name: openssl
|
Name: openssl
|
||||||
Epoch: 1
|
Epoch: 1
|
||||||
Version: 1.1.1m
|
Version: 1.1.1m
|
||||||
Release: 8
|
Release: 9
|
||||||
Summary: Cryptography and SSL/TLS Toolkit
|
Summary: Cryptography and SSL/TLS Toolkit
|
||||||
License: OpenSSL and SSLeay
|
License: OpenSSL and SSLeay
|
||||||
URL: https://www.openssl.org/
|
URL: https://www.openssl.org/
|
||||||
@ -33,6 +33,7 @@ Patch22: Feature-X509-command-supports-SM2-certificate-signing-with-default-
|
|||||||
Patch23: CVE-2022-2068-Fix-file-operations-in-c_rehash.patch
|
Patch23: CVE-2022-2068-Fix-file-operations-in-c_rehash.patch
|
||||||
Patch24: CVE-2022-2097-Fix-AES-OCB-encrypt-decrypt-for-x86-AES-NI.patch
|
Patch24: CVE-2022-2097-Fix-AES-OCB-encrypt-decrypt-for-x86-AES-NI.patch
|
||||||
Patch25: Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch
|
Patch25: Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch
|
||||||
|
Patch26: Fix-reported-performance-degradation-on-aarch64.patch
|
||||||
|
|
||||||
BuildRequires: gcc perl make lksctp-tools-devel coreutils util-linux zlib-devel
|
BuildRequires: gcc perl make lksctp-tools-devel coreutils util-linux zlib-devel
|
||||||
Requires: coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release}
|
Requires: coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release}
|
||||||
@ -235,6 +236,9 @@ make test || :
|
|||||||
%ldconfig_scriptlets libs
|
%ldconfig_scriptlets libs
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Oct 20 2022 fangxiuning <fangxiuning@huawei.com> - 1:1.1.1m-9
|
||||||
|
- fix proformance degradation on aarch64
|
||||||
|
|
||||||
* Tue Sep 13 2022 Xu Yizhou <xuyizhou1@huawei.com> - 1:1.1.1m-8
|
* Tue Sep 13 2022 Xu Yizhou <xuyizhou1@huawei.com> - 1:1.1.1m-8
|
||||||
- add ARMv8 implementations of SM4 in ECB and XTS
|
- add ARMv8 implementations of SM4 in ECB and XTS
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user