zlib/zlib-Optimize-CRC32.patch

95 lines
2.6 KiB
Diff
Raw Permalink Normal View History

2019-12-13 16:17:17 +08:00
From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001
From: liqiang64 <liqiang64@huawei.com>
Date: Tue, 3 Dec 2019 03:22:00 +0000
Subject: [PATCH] zlib: Optimize CRC32
This patch uses the NEON instruction set to optimize the CRC32
algorithm.
2022-12-30 04:28:25 +08:00
On the ARM architecture, we can optimize the efficiency of
2019-12-13 16:17:17 +08:00
crc32 through the interface provided by the neon instruction
set.
Modify by Li Qiang.
---
crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
2019-12-13 16:17:17 +08:00
diff --git a/crc32.c b/crc32.c
2022-12-30 04:28:25 +08:00
index f8357b0..5c53068 100644
2019-12-13 16:17:17 +08:00
--- a/crc32.c
+++ b/crc32.c
2022-12-30 04:28:25 +08:00
@@ -28,6 +28,9 @@
2019-12-13 16:17:17 +08:00
#endif /* MAKECRCH */
2022-12-30 04:28:25 +08:00
#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
2019-12-13 16:17:17 +08:00
+#ifdef __aarch64__
+#include "arm_acle.h"
+#endif
2022-12-30 04:28:25 +08:00
/*
A CRC of a message is computed on N braids of words in the message, where
@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
2019-12-13 16:17:17 +08:00
return (const z_crc_t FAR *)crc_table;
}
+#ifdef __aarch64__
2019-12-13 16:17:17 +08:00
+ulg crc32_neon(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ z_size_t len;
+{
+ register uint32_t crc_result = 0xFFFFFFFFU;
+ register const uint8_t *buf1;
+ register const uint16_t *buf2;
+ register const uint32_t *buf4;
+ register const uint64_t *buf8;
+ int64_t length = (int64_t)len;
+ buf8 = (const uint64_t *)(const void *)buf;
+
+ if (buf == NULL) {
+ crc_result = 0xffffffffL;
+ } else {
+ crc_result = crc^0xffffffffUL;
+
+ while((length -= sizeof(uint64_t)) >= 0) {
+ crc_result = __crc32d((crc_result), *buf8++);
+ }
+
+ buf4 = (const uint32_t *)(const void *)buf8;
+ if (length & sizeof(uint32_t)) {
+ crc_result = __crc32w((crc_result), *buf4++);
+ }
+
+ buf2 = (const uint16_t *)(const void *)buf4;
+ if(length & sizeof(uint16_t)) {
+ crc_result = __crc32h((crc_result), *buf2++);
+ }
+
+ buf1 = (const uint8_t *)(const void *)buf2;
+ if (length & sizeof(uint8_t)) {
+ crc_result = __crc32b((crc_result), *buf1);
+ }
+ }
+
+ return (crc_result ^ 0xffffffffL);
+}
+#endif
2019-12-13 16:17:17 +08:00
+
2022-12-30 04:28:25 +08:00
/* =========================================================================
* Use ARM machine instructions if available. This will compute the CRC about
* ten times faster than the braided calculation. This code does not check for
@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
const unsigned char FAR *buf;
z_size_t len;
{
2019-12-13 16:17:17 +08:00
+ #ifdef __aarch64__
+ return crc32_neon(crc, buf, len);
+ #endif
2022-12-30 04:28:25 +08:00
+
/* Return initial CRC, if requested. */
if (buf == Z_NULL) return 0;
2019-12-13 16:17:17 +08:00
--
2022-12-30 04:28:25 +08:00
2.27.0
2019-12-13 16:17:17 +08:00