Package init
This commit is contained in:
parent
402e2395a6
commit
c1006eccfa
170
performance-neoncrc32-and-prfm.patch
Normal file
170
performance-neoncrc32-and-prfm.patch
Normal file
@ -0,0 +1,170 @@
|
||||
From 134712c35ed2ec5a06c61583dce59867aeb28862 Mon Sep 17 00:00:00 2001
|
||||
From: liqiang64 <liqiang64@huawei.com>
|
||||
Date: Mon, 11 Nov 2019 19:47:36 +0800
|
||||
Subject: [PATCH] performance-neoncrc32-and-prfm
|
||||
|
||||
Analysis of gzip software by perf tool, found that crc32 and
|
||||
longest_match hotspots are very high.
|
||||
|
||||
On the ARM architecture, we can optimize the efficiency of
|
||||
crc32 through the interface provided by the neon instruction
|
||||
set, and optimize the performance of random access code through
|
||||
prefetch instructions.
|
||||
Modify by Li Qiang.
|
||||
---
|
||||
deflate.c | 27 ++++++++++++++++++++++++++-
|
||||
util.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 75 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/deflate.c b/deflate.c
|
||||
index 951d7af..f15a227 100644
|
||||
--- a/deflate.c
|
||||
+++ b/deflate.c
|
||||
@@ -392,6 +392,9 @@ longest_match(IPos cur_match)
|
||||
register int len; /* length of current match */
|
||||
int best_len = prev_length; /* best match length so far */
|
||||
IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;
|
||||
+ #ifdef __aarch64__
|
||||
+ IPos next_match;
|
||||
+ #endif
|
||||
/* Stop when cur_match becomes <= limit. To simplify the code,
|
||||
* we prevent matches with the string of window index 0.
|
||||
*/
|
||||
@@ -425,6 +428,10 @@ longest_match(IPos cur_match)
|
||||
do {
|
||||
Assert(cur_match < strstart, "no future");
|
||||
match = window + cur_match;
|
||||
+ #ifdef __aarch64__
|
||||
+ next_match = prev[cur_match & WMASK];
|
||||
+ __asm__("PRFM PLDL1STRM, [%0]"::"r"(&(prev[next_match & WMASK])));
|
||||
+ #endif
|
||||
|
||||
/* Skip to next match if the match length cannot increase
|
||||
* or if the match length is less than 2:
|
||||
@@ -502,8 +509,14 @@ longest_match(IPos cur_match)
|
||||
scan_end = scan[best_len];
|
||||
#endif
|
||||
}
|
||||
- } while ((cur_match = prev[cur_match & WMASK]) > limit
|
||||
+ }
|
||||
+ #ifdef __aarch64__
|
||||
+ while ((cur_match = next_match) > limit
|
||||
&& --chain_length != 0);
|
||||
+ #else
|
||||
+ while ((cur_match = prev[cur_match & WMASK]) > limit
|
||||
+ && --chain_length != 0);
|
||||
+ #endif
|
||||
|
||||
return best_len;
|
||||
}
|
||||
@@ -788,7 +801,19 @@ off_t deflate()
|
||||
lookahead -= prev_length-1;
|
||||
prev_length -= 2;
|
||||
RSYNC_ROLL(strstart, prev_length+1);
|
||||
+ while (prev_length >= 4) {
|
||||
+ prev_length -= 4;
|
||||
+ strstart++;
|
||||
+ INSERT_STRING(strstart, hash_head);
|
||||
+ strstart++;
|
||||
+ INSERT_STRING(strstart, hash_head);
|
||||
+ strstart++;
|
||||
+ INSERT_STRING(strstart, hash_head);
|
||||
+ strstart++;
|
||||
+ INSERT_STRING(strstart, hash_head);
|
||||
+ }
|
||||
do {
|
||||
+ if (prev_length == 0) break;
|
||||
strstart++;
|
||||
INSERT_STRING(strstart, hash_head);
|
||||
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
|
||||
diff --git a/util.c b/util.c
|
||||
index bb5e9f3..d0b3cb0 100644
|
||||
--- a/util.c
|
||||
+++ b/util.c
|
||||
@@ -31,6 +31,9 @@
|
||||
#include "gzip.h"
|
||||
#include <dirname.h>
|
||||
#include <xalloc.h>
|
||||
+#ifdef __aarch64__
|
||||
+#include <arm_acle.h>
|
||||
+#endif
|
||||
|
||||
#ifndef CHAR_BIT
|
||||
# define CHAR_BIT 8
|
||||
@@ -41,6 +44,7 @@ static int write_buffer (int, voidp, unsigned int);
|
||||
/* ========================================================================
|
||||
* Table of CRC-32's of all single-byte values (made by makecrc.c)
|
||||
*/
|
||||
+#ifndef __aarch64__
|
||||
static const ulg crc_32_tab[] = {
|
||||
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
|
||||
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
|
||||
@@ -95,6 +99,7 @@ static const ulg crc_32_tab[] = {
|
||||
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
|
||||
0x2d02ef8dL
|
||||
};
|
||||
+#endif
|
||||
|
||||
/* ===========================================================================
|
||||
* Copy input to output unchanged: zcat == cat with --force.
|
||||
@@ -129,6 +134,49 @@ ulg updcrc(s, n)
|
||||
uch *s; /* pointer to bytes to pump through */
|
||||
unsigned n; /* number of bytes in s[] */
|
||||
{
|
||||
+ #ifdef __aarch64__
|
||||
+ register ulg c;
|
||||
+ static ulg crc = (ulg)0xffffffffL;
|
||||
+ register const uint8_t *buf1;
|
||||
+ register const uint16_t *buf2;
|
||||
+ register const uint32_t *buf4;
|
||||
+ register const uint64_t *buf8;
|
||||
+ int64_t length = (int64_t)n;
|
||||
+ buf8 = (const uint64_t *)(const void *)s;
|
||||
+
|
||||
+ if (s == NULL) {
|
||||
+ c = 0xffffffffL;
|
||||
+ } else {
|
||||
+ c = crc;
|
||||
+
|
||||
+ while(length >= sizeof(uint64_t)) {
|
||||
+ c = __crc32d(c, *buf8++);
|
||||
+ length -= sizeof(uint64_t);
|
||||
+ }
|
||||
+
|
||||
+ buf4 = (const uint32_t *)(const void *)buf8;
|
||||
+ if (length >= sizeof(uint32_t)) {
|
||||
+ c = __crc32w(c, *buf4++);
|
||||
+ length -= sizeof(uint32_t);
|
||||
+ }
|
||||
+
|
||||
+ buf2 = (const uint16_t *)(const void *)buf4;
|
||||
+ if(length >= sizeof(uint16_t)) {
|
||||
+ c = __crc32h(c, *buf2++);
|
||||
+ length -= sizeof(uint16_t);
|
||||
+ }
|
||||
+
|
||||
+ buf1 = (const uint8_t *)(const void *)buf2;
|
||||
+ if (length >= sizeof(uint8_t)) {
|
||||
+ c = __crc32b(c, *buf1);
|
||||
+ length -= sizeof(uint8_t);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ crc = c;
|
||||
+
|
||||
+ return (c ^ 0xffffffffL);
|
||||
+#else
|
||||
register ulg c; /* temporary variable */
|
||||
|
||||
static ulg crc = (ulg)0xffffffffL; /* shift register contents */
|
||||
@@ -143,6 +191,7 @@ ulg updcrc(s, n)
|
||||
}
|
||||
crc = c;
|
||||
return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */
|
||||
+#endif
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user