From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001 From: liqiang64 Date: Tue, 3 Dec 2019 03:22:00 +0000 Subject: [PATCH] zlib: Optimize CRC32 This patch uses the NEON instruction set to optimize the CRC32 algorithm. On the ARM architecture, we can optimize the efficiency of crc32 through the interface provided by the neon instruction set. Modify by Li Qiang. --- crc32.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/crc32.c b/crc32.c index 9580440..79ebdbd 100644 --- a/crc32.c +++ b/crc32.c @@ -29,6 +29,9 @@ #endif /* MAKECRCH */ #include "zutil.h" /* for STDC and FAR definitions */ +#ifdef __aarch64__ +#include "arm_acle.h" +#endif /* Definitions for doing the crc four data bytes at a time. */ #if !defined(NOBYFOUR) && defined(Z_U4) @@ -194,6 +197,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table() return (const z_crc_t FAR *)crc_table; } +ulg crc32_neon(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register uint32_t crc_result = 0xFFFFFFFFU; + register const uint8_t *buf1; + register const uint16_t *buf2; + register const uint32_t *buf4; + register const uint64_t *buf8; + int64_t length = (int64_t)len; + buf8 = (const uint64_t *)(const void *)buf; + + if (buf == NULL) { + crc_result = 0xffffffffL; + } else { + crc_result = crc^0xffffffffUL; + + while((length -= sizeof(uint64_t)) >= 0) { + crc_result = __crc32d((crc_result), *buf8++); + } + + buf4 = (const uint32_t *)(const void *)buf8; + if (length & sizeof(uint32_t)) { + crc_result = __crc32w((crc_result), *buf4++); + } + + buf2 = (const uint16_t *)(const void *)buf4; + if(length & sizeof(uint16_t)) { + crc_result = __crc32h((crc_result), *buf2++); + } + + buf1 = (const uint8_t *)(const void *)buf2; + if (length & sizeof(uint8_t)) { + crc_result = __crc32b((crc_result), *buf1); + } + } + + return (crc_result ^ 0xffffffffL); +} + /* ========================================================================= */ #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 @@ -204,6 +248,9 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) const unsigned char FAR *buf; z_size_t len; { + #ifdef __aarch64__ + return crc32_neon(crc, buf, len); + #endif if (buf == Z_NULL) return 0UL; #ifdef DYNAMIC_CRC_TABLE -- 1.8.3.1