2019-12-13 16:17:17 +08:00
|
|
|
From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: liqiang64 <liqiang64@huawei.com>
|
|
|
|
|
Date: Tue, 3 Dec 2019 03:22:00 +0000
|
|
|
|
|
Subject: [PATCH] zlib: Optimize CRC32
|
|
|
|
|
|
|
|
|
|
This patch uses the NEON instruction set to optimize the CRC32
|
|
|
|
|
algorithm.
|
|
|
|
|
|
2022-12-30 04:28:25 +08:00
|
|
|
On the ARM architecture, we can optimize the efficiency of
|
2019-12-13 16:17:17 +08:00
|
|
|
crc32 through the interface provided by the neon instruction
|
|
|
|
|
set.
|
|
|
|
|
Modify by Li Qiang.
|
|
|
|
|
---
|
2022-12-30 04:22:56 +00:00
|
|
|
crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
|
1 file changed, 50 insertions(+)
|
2019-12-13 16:17:17 +08:00
|
|
|
|
|
|
|
|
diff --git a/crc32.c b/crc32.c
|
2022-12-30 04:28:25 +08:00
|
|
|
index f8357b0..5c53068 100644
|
2019-12-13 16:17:17 +08:00
|
|
|
--- a/crc32.c
|
|
|
|
|
+++ b/crc32.c
|
2022-12-30 04:28:25 +08:00
|
|
|
@@ -28,6 +28,9 @@
|
2019-12-13 16:17:17 +08:00
|
|
|
#endif /* MAKECRCH */
|
|
|
|
|
|
2022-12-30 04:28:25 +08:00
|
|
|
#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
|
2019-12-13 16:17:17 +08:00
|
|
|
+#ifdef __aarch64__
|
|
|
|
|
+#include "arm_acle.h"
|
|
|
|
|
+#endif
|
|
|
|
|
|
2022-12-30 04:28:25 +08:00
|
|
|
/*
|
|
|
|
|
A CRC of a message is computed on N braids of words in the message, where
|
2022-12-30 04:22:56 +00:00
|
|
|
@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
|
2019-12-13 16:17:17 +08:00
|
|
|
return (const z_crc_t FAR *)crc_table;
|
|
|
|
|
}
|
2022-12-30 04:22:56 +00:00
|
|
|
|
|
|
|
|
+#ifdef __aarch64__
|
2019-12-13 16:17:17 +08:00
|
|
|
+ulg crc32_neon(crc, buf, len)
|
|
|
|
|
+ unsigned long crc;
|
|
|
|
|
+ const unsigned char FAR *buf;
|
|
|
|
|
+ z_size_t len;
|
|
|
|
|
+{
|
|
|
|
|
+ register uint32_t crc_result = 0xFFFFFFFFU;
|
|
|
|
|
+ register const uint8_t *buf1;
|
|
|
|
|
+ register const uint16_t *buf2;
|
|
|
|
|
+ register const uint32_t *buf4;
|
|
|
|
|
+ register const uint64_t *buf8;
|
|
|
|
|
+ int64_t length = (int64_t)len;
|
|
|
|
|
+ buf8 = (const uint64_t *)(const void *)buf;
|
|
|
|
|
+
|
|
|
|
|
+ if (buf == NULL) {
|
|
|
|
|
+ crc_result = 0xffffffffL;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ crc_result = crc^0xffffffffUL;
|
|
|
|
|
+
|
|
|
|
|
+ while((length -= sizeof(uint64_t)) >= 0) {
|
|
|
|
|
+ crc_result = __crc32d((crc_result), *buf8++);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf4 = (const uint32_t *)(const void *)buf8;
|
|
|
|
|
+ if (length & sizeof(uint32_t)) {
|
|
|
|
|
+ crc_result = __crc32w((crc_result), *buf4++);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf2 = (const uint16_t *)(const void *)buf4;
|
|
|
|
|
+ if(length & sizeof(uint16_t)) {
|
|
|
|
|
+ crc_result = __crc32h((crc_result), *buf2++);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf1 = (const uint8_t *)(const void *)buf2;
|
|
|
|
|
+ if (length & sizeof(uint8_t)) {
|
|
|
|
|
+ crc_result = __crc32b((crc_result), *buf1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return (crc_result ^ 0xffffffffL);
|
|
|
|
|
+}
|
2022-12-30 04:22:56 +00:00
|
|
|
+#endif
|
2019-12-13 16:17:17 +08:00
|
|
|
+
|
2022-12-30 04:28:25 +08:00
|
|
|
/* =========================================================================
|
|
|
|
|
* Use ARM machine instructions if available. This will compute the CRC about
|
|
|
|
|
* ten times faster than the braided calculation. This code does not check for
|
|
|
|
|
@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
|
2019-12-13 16:17:17 +08:00
|
|
|
const unsigned char FAR *buf;
|
|
|
|
|
z_size_t len;
|
|
|
|
|
{
|
|
|
|
|
+ #ifdef __aarch64__
|
|
|
|
|
+ return crc32_neon(crc, buf, len);
|
|
|
|
|
+ #endif
|
2022-12-30 04:28:25 +08:00
|
|
|
+
|
|
|
|
|
/* Return initial CRC, if requested. */
|
|
|
|
|
if (buf == Z_NULL) return 0;
|
2019-12-13 16:17:17 +08:00
|
|
|
|
|
|
|
|
--
|
2022-12-30 04:28:25 +08:00
|
|
|
2.27.0
|
2019-12-13 16:17:17 +08:00
|
|
|
|