2021-12-28 11:50:48 +08:00
|
|
|
From 00f4c93e6c6cae92714a96cdde3c07bdfd59c9dc Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: root <root@localhost.localdomain>
|
|
|
|
|
Date: Tue, 28 Dec 2021 10:24:26 +0800
|
|
|
|
|
Subject: [PATCH] performance-neoncrc32-and-prfm-2
|
2019-11-19 14:09:22 +08:00
|
|
|
|
|
|
|
|
---
|
2021-12-28 11:50:48 +08:00
|
|
|
deflate.c | 28 +++++++++++++++++++++++++++-
|
|
|
|
|
util.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
|
2019-11-19 14:09:22 +08:00
|
|
|
2 files changed, 75 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/deflate.c b/deflate.c
|
2021-12-28 11:50:48 +08:00
|
|
|
index 55bdc0e..3add924 100644
|
2019-11-19 14:09:22 +08:00
|
|
|
--- a/deflate.c
|
|
|
|
|
+++ b/deflate.c
|
2021-12-28 11:50:48 +08:00
|
|
|
@@ -378,6 +378,9 @@ longest_match(IPos cur_match)
|
2019-11-19 14:09:22 +08:00
|
|
|
register int len; /* length of current match */
|
|
|
|
|
int best_len = prev_length; /* best match length so far */
|
|
|
|
|
IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;
|
|
|
|
|
+ #ifdef __aarch64__
|
|
|
|
|
+ IPos next_match;
|
|
|
|
|
+ #endif
|
|
|
|
|
/* Stop when cur_match becomes <= limit. To simplify the code,
|
|
|
|
|
* we prevent matches with the string of window index 0.
|
|
|
|
|
*/
|
2021-12-28 11:50:48 +08:00
|
|
|
@@ -412,6 +415,11 @@ longest_match(IPos cur_match)
|
2019-11-19 14:09:22 +08:00
|
|
|
Assert(cur_match < strstart, "no future");
|
|
|
|
|
match = window + cur_match;
|
2021-12-28 11:50:48 +08:00
|
|
|
|
2019-11-19 14:09:22 +08:00
|
|
|
+ #ifdef __aarch64__
|
|
|
|
|
+ next_match = prev[cur_match & WMASK];
|
|
|
|
|
+ __asm__("PRFM PLDL1STRM, [%0]"::"r"(&(prev[next_match & WMASK])));
|
|
|
|
|
+ #endif
|
2021-12-28 11:50:48 +08:00
|
|
|
+
|
2019-11-19 14:09:22 +08:00
|
|
|
/* Skip to next match if the match length cannot increase
|
|
|
|
|
* or if the match length is less than 2:
|
2021-12-28 11:50:48 +08:00
|
|
|
*/
|
|
|
|
|
@@ -488,8 +496,14 @@ longest_match(IPos cur_match)
|
2019-11-19 14:09:22 +08:00
|
|
|
scan_end = scan[best_len];
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
- } while ((cur_match = prev[cur_match & WMASK]) > limit
|
|
|
|
|
+ }
|
|
|
|
|
+ #ifdef __aarch64__
|
|
|
|
|
+ while ((cur_match = next_match) > limit
|
2021-12-28 11:50:48 +08:00
|
|
|
+ && --chain_length != 0);
|
2019-11-19 14:09:22 +08:00
|
|
|
+ #else
|
|
|
|
|
+ while ((cur_match = prev[cur_match & WMASK]) > limit
|
2021-12-28 11:50:48 +08:00
|
|
|
&& --chain_length != 0);
|
2019-11-19 14:09:22 +08:00
|
|
|
+ #endif
|
|
|
|
|
|
|
|
|
|
return best_len;
|
|
|
|
|
}
|
2021-12-28 11:50:48 +08:00
|
|
|
@@ -777,7 +791,19 @@ deflate (int pack_level)
|
2019-11-19 14:09:22 +08:00
|
|
|
lookahead -= prev_length-1;
|
|
|
|
|
prev_length -= 2;
|
|
|
|
|
RSYNC_ROLL(strstart, prev_length+1);
|
|
|
|
|
+ while (prev_length >= 4) {
|
|
|
|
|
+ prev_length -= 4;
|
|
|
|
|
+ strstart++;
|
|
|
|
|
+ INSERT_STRING(strstart, hash_head);
|
|
|
|
|
+ strstart++;
|
|
|
|
|
+ INSERT_STRING(strstart, hash_head);
|
|
|
|
|
+ strstart++;
|
|
|
|
|
+ INSERT_STRING(strstart, hash_head);
|
|
|
|
|
+ strstart++;
|
|
|
|
|
+ INSERT_STRING(strstart, hash_head);
|
|
|
|
|
+ }
|
|
|
|
|
do {
|
|
|
|
|
+ if (prev_length == 0) break;
|
|
|
|
|
strstart++;
|
|
|
|
|
INSERT_STRING(strstart, hash_head);
|
|
|
|
|
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
|
|
|
|
|
diff --git a/util.c b/util.c
|
2021-12-28 11:50:48 +08:00
|
|
|
index e4240a7..397fb3e 100644
|
2019-11-19 14:09:22 +08:00
|
|
|
--- a/util.c
|
|
|
|
|
+++ b/util.c
|
|
|
|
|
@@ -31,6 +31,9 @@
|
|
|
|
|
#include "gzip.h"
|
|
|
|
|
#include <dirname.h>
|
|
|
|
|
#include <xalloc.h>
|
|
|
|
|
+#ifdef __aarch64__
|
|
|
|
|
+#include <arm_acle.h>
|
|
|
|
|
+#endif
|
|
|
|
|
|
|
|
|
|
#ifndef CHAR_BIT
|
|
|
|
|
# define CHAR_BIT 8
|
|
|
|
|
@@ -41,6 +44,7 @@ static int write_buffer (int, voidp, unsigned int);
|
|
|
|
|
/* ========================================================================
|
|
|
|
|
* Table of CRC-32's of all single-byte values (made by makecrc.c)
|
|
|
|
|
*/
|
|
|
|
|
+#ifndef __aarch64__
|
|
|
|
|
static const ulg crc_32_tab[] = {
|
|
|
|
|
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
|
|
|
|
|
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
|
|
|
|
|
@@ -95,6 +99,7 @@ static const ulg crc_32_tab[] = {
|
|
|
|
|
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
|
|
|
|
|
0x2d02ef8dL
|
|
|
|
|
};
|
|
|
|
|
+#endif
|
|
|
|
|
|
2021-12-28 11:50:48 +08:00
|
|
|
/* Shift register contents. */
|
|
|
|
|
static ulg crc = 0xffffffffL;
|
|
|
|
|
@@ -132,6 +137,48 @@ ulg updcrc(s, n)
|
|
|
|
|
const uch *s; /* pointer to bytes to pump through */
|
2019-11-19 14:09:22 +08:00
|
|
|
unsigned n; /* number of bytes in s[] */
|
|
|
|
|
{
|
|
|
|
|
+ #ifdef __aarch64__
|
|
|
|
|
+ register ulg c;
|
|
|
|
|
+ register const uint8_t *buf1;
|
|
|
|
|
+ register const uint16_t *buf2;
|
|
|
|
|
+ register const uint32_t *buf4;
|
|
|
|
|
+ register const uint64_t *buf8;
|
|
|
|
|
+ int64_t length = (int64_t)n;
|
|
|
|
|
+ buf8 = (const uint64_t *)(const void *)s;
|
|
|
|
|
+
|
|
|
|
|
+ if (s == NULL) {
|
|
|
|
|
+ c = 0xffffffffL;
|
|
|
|
|
+ } else {
|
2021-12-28 11:50:48 +08:00
|
|
|
+ c = crc;
|
2019-11-19 14:09:22 +08:00
|
|
|
+
|
|
|
|
|
+ while(length >= sizeof(uint64_t)) {
|
|
|
|
|
+ c = __crc32d(c, *buf8++);
|
|
|
|
|
+ length -= sizeof(uint64_t);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf4 = (const uint32_t *)(const void *)buf8;
|
|
|
|
|
+ if (length >= sizeof(uint32_t)) {
|
|
|
|
|
+ c = __crc32w(c, *buf4++);
|
|
|
|
|
+ length -= sizeof(uint32_t);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf2 = (const uint16_t *)(const void *)buf4;
|
|
|
|
|
+ if(length >= sizeof(uint16_t)) {
|
|
|
|
|
+ c = __crc32h(c, *buf2++);
|
|
|
|
|
+ length -= sizeof(uint16_t);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ buf1 = (const uint8_t *)(const void *)buf2;
|
|
|
|
|
+ if (length >= sizeof(uint8_t)) {
|
|
|
|
|
+ c = __crc32b(c, *buf1);
|
|
|
|
|
+ length -= sizeof(uint8_t);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ crc = c;
|
|
|
|
|
+
|
|
|
|
|
+ return (c ^ 0xffffffffL);
|
|
|
|
|
+#else
|
|
|
|
|
register ulg c; /* temporary variable */
|
|
|
|
|
|
2021-12-28 11:50:48 +08:00
|
|
|
if (s == NULL) {
|
|
|
|
|
@@ -144,6 +191,7 @@ ulg updcrc(s, n)
|
2019-11-19 14:09:22 +08:00
|
|
|
}
|
|
|
|
|
crc = c;
|
|
|
|
|
return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */
|
|
|
|
|
+#endif
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-28 11:50:48 +08:00
|
|
|
/* Return a current CRC value. */
|
2019-11-19 14:09:22 +08:00
|
|
|
--
|
2021-12-28 11:50:48 +08:00
|
|
|
2.27.0
|
2019-11-19 14:09:22 +08:00
|
|
|
|