diff --git a/0001-Neon-Optimized-hash-chain-rebase.patch b/0001-Neon-Optimized-hash-chain-rebase.patch deleted file mode 100644 index e6f909f..0000000 --- a/0001-Neon-Optimized-hash-chain-rebase.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001 -From: Adenilson Cavalcanti -Date: Mon, 9 Apr 2018 13:52:17 -0700 -Subject: [PATCH 1/3] Neon-Optimized hash chain rebase - -This should help with compression of data, using NEON instructions -(therefore useful for ARMv7/ARMv8). - -Original patch by Jun He. ---- - CMakeLists.txt | 18 ++++++++ - contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++ - deflate.c | 7 +++ - 3 files changed, 109 insertions(+) - create mode 100644 contrib/arm/neon_slide_hash.h - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 0fe939d..e9a74e9 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC) - set(ZLIB_ASMS contrib/amd64/amd64-match.S) - endif () - -+ if(ARM_NEON) -+ list(REMOVE_ITEM ZLIB_SRCS inflate.c) -+ set(ZLIB_ARM_NEON_HDRS -+ contrib/arm/chunkcopy.h -+ contrib/arm/inffast_chunk.h -+ contrib/arm/neon_slide_hash.h) -+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) -+ add_definitions(-DARM_NEON) -+ set(COMPILER ${CMAKE_C_COMPILER}) -+ # NEON is mandatory in ARMv8. -+ if(${COMPILER} MATCHES "aarch64") -+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a) -+ # But it was optional for ARMv7. -+ elseif(${COMPILER} MATCHES "arm") -+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon) -+ endif() -+ endif() -+ - if(ZLIB_ASMS) - add_definitions(-DASMV) - set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE) -diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h -new file mode 100644 -index 0000000..0daffa1 ---- /dev/null -+++ b/contrib/arm/neon_slide_hash.h -@@ -0,0 +1,84 @@ -+/* Copyright (C) 1995-2011, 2016 Mark Adler -+ * Copyright (C) 2017 ARM Holdings Inc. -+ * Authors: Adenilson Cavalcanti -+ * Jun He -+ * This software is provided 'as-is', without any express or implied -+ * warranty. In no event will the authors be held liable for any damages -+ * arising from the use of this software. -+ * Permission is granted to anyone to use this software for any purpose, -+ * including commercial applications, and to alter it and redistribute it -+ * freely, subject to the following restrictions: -+ * 1. The origin of this software must not be misrepresented; you must not -+ * claim that you wrote the original software. If you use this software -+ * in a product, an acknowledgment in the product documentation would be -+ * appreciated but is not required. -+ * 2. Altered source versions must be plainly marked as such, and must not be -+ * misrepresented as being the original software. -+ * 3. This notice may not be removed or altered from any source distribution. -+ */ -+#ifndef __NEON_SLIDE_HASH__ -+#define __NEON_SLIDE_HASH__ -+ -+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) -+#include "deflate.h" -+#include -+ -+inline static void neon_slide_hash(deflate_state *s) -+{ -+ /* -+ * This is ASIMD implementation for hash table rebase -+ * it assumes: -+ * 1. hash chain offset (Pos) is 2 bytes -+ * 2. hash table size is multiple*128 bytes -+ * #1 should be true as Pos is defined as "ush" -+ * #2 should be true as hash_bits are greater that 7 -+ */ -+ unsigned n, m; -+ unsigned short wsize = s->w_size; -+ uint16x8_t v, *p; -+ size_t size; -+ -+ size = s->hash_size*sizeof(s->head[0]); -+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err"); -+ -+ Assert(sizeof(Pos) == 2, "Wrong Pos size"); -+ -+ /* slide s->head */ -+ v = vdupq_n_u16(wsize); -+ p = (uint16x8_t *)(s->head); -+ n = size / (sizeof(uint16x8_t) * 8); -+ do { -+ p[0] = vqsubq_u16(p[0], v); -+ p[1] = vqsubq_u16(p[1], v); -+ p[2] = vqsubq_u16(p[2], v); -+ p[3] = vqsubq_u16(p[3], v); -+ p[4] = vqsubq_u16(p[4], v); -+ p[5] = vqsubq_u16(p[5], v); -+ p[6] = vqsubq_u16(p[6], v); -+ p[7] = vqsubq_u16(p[7], v); -+ p += 8; -+ } while (--n); -+#ifndef FASTEST -+ /* slide s->prev */ -+ size = wsize*sizeof(s->prev[0]); -+ -+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err"); -+ -+ p = (uint16x8_t *)(s->prev); -+ n = size / (sizeof(uint16x8_t) * 8); -+ do { -+ p[0] = vqsubq_u16(p[0], v); -+ p[1] = vqsubq_u16(p[1], v); -+ p[2] = vqsubq_u16(p[2], v); -+ p[3] = vqsubq_u16(p[3], v); -+ p[4] = vqsubq_u16(p[4], v); -+ p[5] = vqsubq_u16(p[5], v); -+ p[6] = vqsubq_u16(p[6], v); -+ p[7] = vqsubq_u16(p[7], v); -+ p += 8; -+ } while (--n); -+#endif -+} -+ -+#endif -+#endif -diff --git a/deflate.c b/deflate.c -index 1ec7614..36f99ac 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -50,6 +50,9 @@ - /* @(#) $Id$ */ - - #include "deflate.h" -+#if __ARM_NEON -+#include "contrib/arm/neon_slide_hash.h" -+#endif - - const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -@@ -201,6 +204,9 @@ local const config configuration_table[10] = { - local void slide_hash(s) - deflate_state *s; - { -+#if ARM_NEON -+ return neon_slide_hash(s); -+#else - unsigned n, m; - Posf *p; - uInt wsize = s->w_size; -@@ -222,6 +228,7 @@ local void slide_hash(s) - */ - } while (--n); - #endif -+#endif - } - - /* ========================================================================= */ --- -2.19.0 - diff --git a/0002-Porting-optimized-longest_match.patch b/0002-Porting-optimized-longest_match.patch deleted file mode 100644 index 7fda12e..0000000 --- a/0002-Porting-optimized-longest_match.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001 -From: Adenilson Cavalcanti -Date: Mon, 9 Apr 2018 15:14:19 -0700 -Subject: [PATCH 2/3] Porting optimized longest_match - -This patch was contributed to zlib-ng and features an improved longest_match -function using the most distant hash code to reduce number of checks -(see: http://www.gildor.org/en/projects/zlib). - -Original patch by Jun He. ---- - CMakeLists.txt | 3 +- - contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++ - deflate.c | 11 ++- - 3 files changed, 152 insertions(+), 4 deletions(-) - create mode 100644 contrib/arm/arm_longest_match.h - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index e9a74e9..3826eba 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC) - set(ZLIB_ARM_NEON_HDRS - contrib/arm/chunkcopy.h - contrib/arm/inffast_chunk.h -- contrib/arm/neon_slide_hash.h) -+ contrib/arm/neon_slide_hash.h -+ contrib/arm/arm_longest_match.h) - set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) - add_definitions(-DARM_NEON) - set(COMPILER ${CMAKE_C_COMPILER}) -diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h -new file mode 100644 -index 0000000..9e7083f ---- /dev/null -+++ b/contrib/arm/arm_longest_match.h -@@ -0,0 +1,142 @@ -+/* Copyright (C) 1995-2011, 2016 Mark Adler -+ * Copyright (C) 2017 ARM Holdings Inc. -+ * Authors: Adenilson Cavalcanti -+ * Jun He -+ * This software is provided 'as-is', without any express or implied -+ * warranty. In no event will the authors be held liable for any damages -+ * arising from the use of this software. -+ * Permission is granted to anyone to use this software for any purpose, -+ * including commercial applications, and to alter it and redistribute it -+ * freely, subject to the following restrictions: -+ * 1. The origin of this software must not be misrepresented; you must not -+ * claim that you wrote the original software. If you use this software -+ * in a product, an acknowledgment in the product documentation would be -+ * appreciated but is not required. -+ * 2. Altered source versions must be plainly marked as such, and must not be -+ * misrepresented as being the original software. -+ * 3. This notice may not be removed or altered from any source distribution. -+ */ -+#ifndef __ARM_LONGEST__MATCH__ -+#define __ARM_LONGEST__MATCH__ -+ -+#if defined(ARM_NEON) -+#include "deflate.h" -+#include -+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max) -+{ -+ register int len = 0; -+ register unsigned long xor = 0; -+ register int check_loops = max/sizeof(unsigned long); -+ while(check_loops-- > 0) { -+ xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len)); -+ if (xor) break; -+ len += sizeof(unsigned long); -+ } -+ if (0 == xor) { -+ while (len < max) { -+ if (a[len] != b[len]) break; -+ len++; -+ } -+ return len; -+ } -+ xor = __builtin_ctzl(xor)>>3; -+ return len + xor; -+} -+ -+/* -+ * This implementation is based on algorithm described at: -+ * http://www.gildor.org/en/projects/zlib -+ * It uses the hash chain indexed by the most distant hash code to -+ * reduce number of checks. -+ * This also eliminates the those unnecessary check loops in legacy -+ * longest_match's do..while loop if the "most distant code" is out -+ * of search buffer -+ * -+ */ -+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) { -+ unsigned chain_length = s->max_chain_length;/* max hash chain length */ -+ unsigned char *scan = s->window + s->strstart; /* current string */ -+ unsigned char *match; /* matched string */ -+ unsigned int len; /* length of current match */ -+ unsigned int best_len = s->prev_length; /* best match length so far */ -+ unsigned int nice_match = s->nice_match; /* stop if match long enough */ -+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -+ s->strstart - (IPos)MAX_DIST(s) : 0; -+ /* Stop when cur_match becomes <= limit. To simplify the code, -+ * we prevent matches with the string of window index 0. -+ */ -+ int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */ -+ Pos *prev = s->prev; -+ unsigned int wmask = s->w_mask; -+ unsigned char *scan_buf_base = s->window; -+ -+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. -+ * It is easy to get rid of this optimization if necessary. -+ */ -+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); -+ -+ /* Do not look for matches beyond the end of the input. This is necessary -+ * to make deflate deterministic. -+ */ -+ if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead; -+ -+ Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); -+ -+ /* find most distant hash code for lazy_match */ -+ if (best_len > MIN_MATCH) { -+ /* search for most distant hash code */ -+ int i; -+ uint16_t hash = 0; -+ IPos pos; -+ -+ UPDATE_HASH(s, hash, scan[1]); -+ UPDATE_HASH(s, hash, scan[2]); -+ for (i = 3; i <= best_len; i++) { -+ UPDATE_HASH(s, hash, scan[i]); -+ /* get head IPos of hash calced by scan[i-2..i] */ -+ pos = s->head[hash]; -+ /* compare it to current "farthest hash" IPos */ -+ if (pos <= cur_match) { -+ /* we have a new "farthest hash" now */ -+ offset = i - 2; -+ cur_match = pos; -+ } -+ } -+ -+ /* update variables to correspond offset */ -+ limit += offset; -+ /* -+ * check if the most distant code's offset is out of search buffer -+ * if it is true, then this means scan[offset..offset+2] are not -+ * presented in the search buffer. So we just return best_len -+ * we've found. -+ */ -+ if (cur_match < limit) return best_len; -+ -+ scan_buf_base -= offset; -+ /* reduce hash search depth based on best_len */ -+ chain_length /= best_len - MIN_MATCH; -+ } -+ -+ do { -+ Assert(cur_match < s->strstart, "no future"); -+ -+ /* Determine matched length at current pos */ -+ match = scan_buf_base + cur_match; -+ len = get_match_len(match, scan, MAX_MATCH); -+ -+ if (len > best_len) { -+ /* found longer string */ -+ s->match_start = cur_match - offset; -+ best_len = len; -+ /* good enough? */ -+ if (len >= nice_match) break; -+ } -+ /* move to prev pos in this hash chain */ -+ } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); -+ -+ return (best_len <= s->lookahead)? best_len : s->lookahead; -+} -+ -+#endif -+#endif -diff --git a/deflate.c b/deflate.c -index 36f99ac..4c42259 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -50,9 +50,6 @@ - /* @(#) $Id$ */ - - #include "deflate.h" --#if __ARM_NEON --#include "contrib/arm/neon_slide_hash.h" --#endif - - const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -@@ -196,6 +193,11 @@ local const config configuration_table[10] = { - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -+#if defined(ARM_NEON) -+#include "contrib/arm/arm_longest_match.h" -+#include "contrib/arm/neon_slide_hash.h" -+#endif -+ - /* =========================================================================== - * Slide the hash table when sliding the window down (could be avoided with 32 - * bit values at the expense of memory usage). We slide even when level == 0 to -@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ - { -+#if defined(ARM_NEON) -+ return arm_longest_match(s, cur_match); -+#endif - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ --- -2.19.0 - diff --git a/0003-arm64-specific-build-patch.patch b/0003-arm64-specific-build-patch.patch deleted file mode 100644 index b8b6b55..0000000 --- a/0003-arm64-specific-build-patch.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e0be75f8dce27a4e32196529df2a08dca791a286 Mon Sep 17 00:00:00 2001 -From: Jeremy Linton -Date: Fri, 6 Apr 2018 11:46:42 -0500 -Subject: [PATCH 3/3] arm64 specific build patch - ---- - Makefile.in | 19 ++++++++++++------- - configure | 2 +- - contrib/minizip/zip.c | 6 ++++-- - 3 files changed, 17 insertions(+), 10 deletions(-) - -diff --git a/Makefile.in b/Makefile.in -index 5a77949..9f088e5 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -57,7 +57,7 @@ SRCDIR= - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - -@@ -163,16 +163,16 @@ crc32.o: $(SRCDIR)crc32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - - deflate.o: $(SRCDIR)deflate.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)deflate.c - - infback.o: $(SRCDIR)infback.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c - - inffast.o: $(SRCDIR)inffast.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c - - inflate.o: $(SRCDIR)inflate.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inflate.c - - inftrees.o: $(SRCDIR)inftrees.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c -@@ -214,7 +214,7 @@ crc32.lo: $(SRCDIR)crc32.c - - deflate.lo: $(SRCDIR)deflate.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c - -@mv objs/deflate.o $@ - - infback.lo: $(SRCDIR)infback.c -@@ -222,14 +222,19 @@ infback.lo: $(SRCDIR)infback.c - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c - -@mv objs/infback.o $@ - -+arminffast.lo: $(SRCDIR)contrib/arm/inffast_chunk.c $(SRCDIR)inffast.c -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/arminffast.o $(SRCDIR)contrib/arm/inffast_chunk.c -+ -@mv objs/arminffast.o $@ -+ - inffast.lo: $(SRCDIR)inffast.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c - -@mv objs/inffast.o $@ - - inflate.lo: $(SRCDIR)inflate.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c - -@mv objs/inflate.o $@ - - inftrees.lo: $(SRCDIR)inftrees.c -diff --git a/configure b/configure -index e974d1f..0c5f837 100755 ---- a/configure -+++ b/configure -@@ -23,7 +23,7 @@ SRCDIR=`dirname $0` - if test $SRCDIR = "."; then - ZINC="" - ZINCOUT="-I." -- SRCDIR="" -+ SRCDIR="./" - else - ZINC='-include zconf.h' - ZINCOUT='-I. -I$(SRCDIR)' -diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c -index 44e88a9..0517930 100644 ---- a/contrib/minizip/zip.c -+++ b/contrib/minizip/zip.c -@@ -519,15 +519,17 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f - break; - - for (i=(int)uReadSize-3; (i--)>0;) -+ { - if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && - ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) - { - uPosFound = uReadPos+i; - break; - } -+ } - -- if (uPosFound!=0) -- break; -+ if (uPosFound!=0) -+ break; - } - TRYFREE(buf); - return uPosFound; --- -2.19.0 - diff --git a/0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch b/Accelerate-Adler32-using-arm64-SVE-instructions.patch similarity index 100% rename from 0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch rename to Accelerate-Adler32-using-arm64-SVE-instructions.patch diff --git a/backport-0001-CVE-2018-25032.patch b/backport-0001-CVE-2018-25032.patch deleted file mode 100644 index 74eb73f..0000000 --- a/backport-0001-CVE-2018-25032.patch +++ /dev/null @@ -1,346 +0,0 @@ -From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Tue, 17 Apr 2018 22:09:22 -0700 -Subject: [PATCH] Fix a bug that can crash deflate on some input when using - Z_FIXED. - -This bug was reported by Danilo Ramos of Eideticom, Inc. It has -lain in wait 13 years before being found! The bug was introduced -in zlib 1.2.2.2, with the addition of the Z_FIXED option. That -option forces the use of fixed Huffman codes. For rare inputs with -a large number of distant matches, the pending buffer into which -the compressed data is written can overwrite the distance symbol -table which it overlays. That results in corrupted output due to -invalid distances, and can result in out-of-bound accesses, -crashing the application. - -The fix here combines the distance buffer and literal/length -buffers into a single symbol buffer. Now three bytes of pending -buffer space are opened up for each literal or length/distance -pair consumed, instead of the previous two bytes. This assures -that the pending buffer cannot overwrite the symbol table, since -the maximum fixed code compressed length/distance is 31 bits, and -since there are four bytes of pending space for every three bytes -of symbol space. - ---- - deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- - deflate.h | 25 +++++++++---------- - trees.c | 49 +++++++++++------------------------- - 3 files changed, 79 insertions(+), 69 deletions(-) - -diff --git a/deflate.c b/deflate.c -index ef981b0..3db6d75 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -269,11 +269,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - int wrap = 1; - static const char my_version[] = ZLIB_VERSION; - -- ushf *overlay; -- /* We overlay pending_buf and d_buf+l_buf. This works since the average -- * output size for (length,distance) codes is <= 24 bits. -- */ -- - if (version == Z_NULL || version[0] != my_version[0] || - stream_size != sizeof(z_stream)) { - return Z_VERSION_ERROR; -@@ -344,9 +339,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - -- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); -- s->pending_buf = (uchf *) overlay; -- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); -+ /* We overlay pending_buf and sym_buf. This works since the average size -+ * for length/distance pairs over any compressed block is assured to be 31 -+ * bits or less. -+ * -+ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 -+ * extra bits, for lengths 131 to 257. The longest fixed distance codes are -+ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest -+ * possible fixed-codes length/distance pair is then 31 bits total. -+ * -+ * sym_buf starts one-fourth of the way into pending_buf. So there are -+ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol -+ * in sym_buf is three bytes -- two for the distance and one for the -+ * literal/length. As each symbol is consumed, the pointer to the next -+ * sym_buf value to read moves forward three bytes. From that symbol, up to -+ * 31 bits are written to pending_buf. The closest the written pending_buf -+ * bits gets to the next sym_buf symbol to read is just before the last -+ * code is written. At that time, 31*(n-2) bits have been written, just -+ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at -+ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 -+ * symbols are written.) The closest the writing gets to what is unread is -+ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and -+ * can range from 128 to 32768. -+ * -+ * Therefore, at a minimum, there are 142 bits of space between what is -+ * written and what is read in the overlain buffers, so the symbols cannot -+ * be overwritten by the compressed data. That space is actually 139 bits, -+ * due to the three-bit fixed-code block header. -+ * -+ * That covers the case where either Z_FIXED is specified, forcing fixed -+ * codes, or when the use of fixed codes is chosen, because that choice -+ * results in a smaller compressed block than dynamic codes. That latter -+ * condition then assures that the above analysis also covers all dynamic -+ * blocks. A dynamic-code block will only be chosen to be emitted if it has -+ * fewer bits than a fixed-code block would for the same set of symbols. -+ * Therefore its average symbol length is assured to be less than 31. So -+ * the compressed data for a dynamic block also cannot overwrite the -+ * symbols from which it is being constructed. -+ */ -+ -+ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); -+ s->pending_buf_size = (ulg)s->lit_bufsize * 4; - - if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || - s->pending_buf == Z_NULL) { -@@ -355,8 +388,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - deflateEnd (strm); - return Z_MEM_ERROR; - } -- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); -- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; -+ s->sym_buf = s->pending_buf + s->lit_bufsize; -+ s->sym_end = (s->lit_bufsize - 1) * 3; -+ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K -+ * on 16 bit machines and because stored blocks are restricted to -+ * 64K-1 bytes. -+ */ - - s->level = level; - s->strategy = strategy; -@@ -567,7 +604,7 @@ int ZEXPORT deflatePrime (strm, bits, value) - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; -- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) -+ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) - return Z_BUF_ERROR; - do { - put = Buf_size - s->bi_valid; -@@ -1126,7 +1163,6 @@ int ZEXPORT deflateCopy (dest, source) - #else - deflate_state *ds; - deflate_state *ss; -- ushf *overlay; - - - if (deflateStateCheck(source) || dest == Z_NULL) { -@@ -1146,8 +1182,7 @@ int ZEXPORT deflateCopy (dest, source) - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); - ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); -- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); -- ds->pending_buf = (uchf *) overlay; -+ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); - - if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || - ds->pending_buf == Z_NULL) { -@@ -1161,8 +1196,7 @@ int ZEXPORT deflateCopy (dest, source) - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); -- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); -- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; -+ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; -@@ -1934,7 +1968,7 @@ local block_state deflate_fast(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -2065,7 +2099,7 @@ local block_state deflate_slow(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -2140,7 +2174,7 @@ local block_state deflate_rle(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -2179,7 +2213,7 @@ local block_state deflate_huff(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -diff --git a/deflate.h b/deflate.h -index 23ecdd3..d4cf1a9 100644 ---- a/deflate.h -+++ b/deflate.h -@@ -217,7 +217,7 @@ typedef struct internal_state { - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - -- uchf *l_buf; /* buffer for literals or lengths */ -+ uchf *sym_buf; /* buffer for distances and literals/lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for -@@ -239,13 +239,8 @@ typedef struct internal_state { - * - I can't count above 4 - */ - -- uInt last_lit; /* running index in l_buf */ -- -- ushf *d_buf; -- /* Buffer for distances. To simplify the code, d_buf and l_buf have -- * the same number of elements. To use different lengths, an extra flag -- * array would be necessary. -- */ -+ uInt sym_next; /* running index in sym_buf */ -+ uInt sym_end; /* symbol table full when sym_next reaches this */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ -@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, - - # define _tr_tally_lit(s, c, flush) \ - { uch cc = (c); \ -- s->d_buf[s->last_lit] = 0; \ -- s->l_buf[s->last_lit++] = cc; \ -+ s->sym_buf[s->sym_next++] = 0; \ -+ s->sym_buf[s->sym_next++] = 0; \ -+ s->sym_buf[s->sym_next++] = cc; \ - s->dyn_ltree[cc].Freq++; \ -- flush = (s->last_lit == s->lit_bufsize-1); \ -+ flush = (s->sym_next == s->sym_end); \ - } - # define _tr_tally_dist(s, distance, length, flush) \ - { uch len = (uch)(length); \ - ush dist = (ush)(distance); \ -- s->d_buf[s->last_lit] = dist; \ -- s->l_buf[s->last_lit++] = len; \ -+ s->sym_buf[s->sym_next++] = dist; \ -+ s->sym_buf[s->sym_next++] = dist >> 8; \ -+ s->sym_buf[s->sym_next++] = len; \ - dist--; \ - s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ - s->dyn_dtree[d_code(dist)].Freq++; \ -- flush = (s->last_lit == s->lit_bufsize-1); \ -+ flush = (s->sym_next == s->sym_end); \ - } - #else - # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) -diff --git a/trees.c b/trees.c -index 50cf4b4..8bf5c80 100644 ---- a/trees.c -+++ b/trees.c -@@ -416,7 +416,7 @@ local void init_block(s) - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; -- s->last_lit = s->matches = 0; -+ s->sym_next = s->matches = 0; - } - - #define SMALLEST 1 -@@ -947,7 +947,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, -- s->last_lit)); -+ s->sym_next / 3)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - -@@ -1016,8 +1016,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) - unsigned dist; /* distance of matched string */ - unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ - { -- s->d_buf[s->last_lit] = (ush)dist; -- s->l_buf[s->last_lit++] = (uch)lc; -+ s->sym_buf[s->sym_next++] = dist; -+ s->sym_buf[s->sym_next++] = dist >> 8; -+ s->sym_buf[s->sym_next++] = lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; -@@ -1033,29 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) - s->dyn_dtree[d_code(dist)].Freq++; - } - --#ifdef TRUNCATE_BLOCK -- /* Try to guess if it is profitable to stop the current block here */ -- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { -- /* Compute an upper bound for the compressed length */ -- ulg out_length = (ulg)s->last_lit*8L; -- ulg in_length = (ulg)((long)s->strstart - s->block_start); -- int dcode; -- for (dcode = 0; dcode < D_CODES; dcode++) { -- out_length += (ulg)s->dyn_dtree[dcode].Freq * -- (5L+extra_dbits[dcode]); -- } -- out_length >>= 3; -- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", -- s->last_lit, in_length, out_length, -- 100L - out_length*100L/in_length)); -- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; -- } --#endif -- return (s->last_lit == s->lit_bufsize-1); -- /* We avoid equality with lit_bufsize because of wraparound at 64K -- * on 16 bit machines and because stored blocks are restricted to -- * 64K-1 bytes. -- */ -+ return (s->sym_next == s->sym_end); - } - - /* =========================================================================== -@@ -1068,13 +1047,14 @@ local void compress_block(s, ltree, dtree) - { - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ -- unsigned lx = 0; /* running index in l_buf */ -+ unsigned sx = 0; /* running index in sym_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - -- if (s->last_lit != 0) do { -- dist = s->d_buf[lx]; -- lc = s->l_buf[lx++]; -+ if (s->sym_next != 0) do { -+ dist = s->sym_buf[sx++] & 0xff; -+ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; -+ lc = s->sym_buf[sx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); -@@ -1099,11 +1079,10 @@ local void compress_block(s, ltree, dtree) - } - } /* literal or match pair ? */ - -- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ -- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, -- "pendingBuf overflow"); -+ /* Check that the overlay between pending_buf and sym_buf is ok: */ -+ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - -- } while (lx < s->last_lit); -+ } while (sx < s->sym_next); - - send_code(s, END_BLOCK, ltree); - } --- -2.27.0 - diff --git a/backport-0001-CVE-2022-37434.patch b/backport-0001-CVE-2022-37434.patch deleted file mode 100644 index 59d75a6..0000000 --- a/backport-0001-CVE-2022-37434.patch +++ /dev/null @@ -1,35 +0,0 @@ -From eff308af425b67093bab25f80f1ae950166bece1 Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Sat, 30 Jul 2022 15:51:11 -0700 -Subject: [PATCH] Fix a bug when getting a gzip header extra field with - inflate(). - -If the extra field was larger than the space the user provided with -inflateGetHeader(), and if multiple calls of inflate() delivered -the extra header data, then there could be a buffer overflow of the -provided space. This commit assures that provided space is not -exceeded. ---- - inflate.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/inflate.c b/inflate.c -index 2a0ac30..95a38f5 100644 ---- a/inflate.c -+++ b/inflate.c -@@ -765,9 +765,10 @@ int flush; - copy = state->length; - if (copy > have) copy = have; - if (copy) { -+ len = state->head->extra_len - state->length; - if (state->head != Z_NULL && -- state->head->extra != Z_NULL) { -- len = state->head->extra_len - state->length; -+ state->head->extra != Z_NULL && -+ len < state->head->extra_max) { - zmemcpy(state->head->extra + len, next, - len + copy > state->head->extra_max ? - state->head->extra_max - len : copy); --- -2.27.0 - diff --git a/backport-0002-CVE-2018-25032.patch b/backport-0002-CVE-2018-25032.patch deleted file mode 100644 index 20b69d0..0000000 --- a/backport-0002-CVE-2018-25032.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 4346a16853e19b45787ce933666026903fb8f3f8 Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Tue, 17 Apr 2018 22:44:41 -0700 -Subject: [PATCH] Assure that the number of bits for deflatePrime() is valid. - ---- - deflate.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/deflate.c b/deflate.c -index 3db6d75..a4b0977 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -604,7 +604,8 @@ int ZEXPORT deflatePrime (strm, bits, value) - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; -- if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) -+ if (bits < 0 || bits > 16 || -+ s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) - return Z_BUF_ERROR; - do { - put = Buf_size - s->bi_valid; --- -2.27.0 - diff --git a/backport-0002-CVE-2022-37434.patch b/backport-0002-CVE-2022-37434.patch deleted file mode 100644 index a115fce..0000000 --- a/backport-0002-CVE-2022-37434.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 1eb7682f845ac9e9bf9ae35bbfb3bad5dacbd91d Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Mon, 8 Aug 2022 10:50:09 -0700 -Subject: [PATCH] Fix extra field processing bug that dereferences NULL - state->head. - -The recent commit to fix a gzip header extra field processing bug -introduced the new bug fixed here. ---- - inflate.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/inflate.c b/inflate.c -index 95a38f5..9c5934e 100644 ---- a/inflate.c -+++ b/inflate.c -@@ -765,10 +765,10 @@ int flush; - copy = state->length; - if (copy > have) copy = have; - if (copy) { -- len = state->head->extra_len - state->length; - if (state->head != Z_NULL && - state->head->extra != Z_NULL && -- len < state->head->extra_max) { -+ (len = state->head->extra_len - state->length) < -+ state->head->extra_max) { - zmemcpy(state->head->extra + len, next, - len + copy > state->head->extra_max ? - state->head->extra_max - len : copy); --- -2.27.0 - diff --git a/fix-undefined-buffer-detected-by-oss-fuzz.patch b/backport-fix-undefined-buffer-detected-by-oss-fuzz.patch similarity index 100% rename from fix-undefined-buffer-detected-by-oss-fuzz.patch rename to backport-fix-undefined-buffer-detected-by-oss-fuzz.patch diff --git a/zlib-1.2.5-minizip-fixuncrypt.patch b/backport-zlib-1.2.5-minizip-fixuncrypt.patch similarity index 100% rename from zlib-1.2.5-minizip-fixuncrypt.patch rename to backport-zlib-1.2.5-minizip-fixuncrypt.patch diff --git a/zlib-1.2.11-SIMD.patch b/zlib-1.2.11-SIMD.patch deleted file mode 100644 index d9f251b..0000000 --- a/zlib-1.2.11-SIMD.patch +++ /dev/null @@ -1,743 +0,0 @@ -diff --git a/deflate.c b/deflate.c -index f30f71b..c018064 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -184,8 +184,16 @@ local const config configuration_table[10] = { - * characters, so that a running hash key can be computed from the previous - * key instead of complete recalculation each time. - */ --#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) -+#if defined(HASH_ARMV8_CRC32) -+#include -+#define UPDATE_HASH_CRC_INTERNAL(s, h, c) \ -+ (h = __crc32w(0, (c) & 0xFFFFFF) & ((deflate_state *)s)->hash_mask) - -+#define UPDATE_HASH(s, h, c) \ -+ UPDATE_HASH_CRC_INTERNAL(s, h, *(unsigned *)((uintptr_t)(&c) - (MIN_MATCH-1))) -+#else -+#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) -+#endif - - /* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head -@@ -1198,14 +1247,15 @@ local unsigned read_buf(strm, buf, size) - strm->avail_in -= len; - - zmemcpy(buf, strm->next_in, len); -- if (strm->state->wrap == 1) { -- strm->adler = adler32(strm->adler, buf, len); -- } - #ifdef GZIP -- else if (strm->state->wrap == 2) { -+ if (strm->state->wrap == 2) { /* use crc32 algo */ - strm->adler = crc32(strm->adler, buf, len); -- } -+ } else - #endif -+ if (strm->state->wrap == 1) { -+ strm->adler = adler32(strm->adler, buf, len); -+ } -+ - strm->next_in += len; - strm->total_in += len; - -diff --git a/inffast.c b/inffast.c -index 4bfc995..2084739 100644 ---- a/inffast.c -+++ b/inffast.c -@@ -81,6 +81,9 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ - unsigned char FAR *out; /* local strm->next_out */ - unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ - unsigned char FAR *end; /* while out < end, enough space available */ -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ unsigned char FAR *limit; /* safety limit for chunky copies */ -+#endif - #ifdef INFLATE_STRICT - unsigned dmax; /* maximum distance from zlib header */ - #endif -@@ -113,7 +116,12 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ - #endif - wsize = state->wsize; - whave = state->whave; -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ limit = out + strm->avail_out; -+ wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext; -+#else - wnext = state->wnext; -+#endif - window = state->window; - hold = state->hold; - bits = state->bits; -@@ -221,6 +229,45 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ - #endif - } - from = window; -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ if (wnext >= op) { /* contiguous in window */ -+ from += wnext - op; -+ } -+ else { /* wrap around window */ -+ op -= wnext; -+ from += wsize - op; -+ if (op < len) { /* some from end of window */ -+ len -= op; -+ out = chunkcopy_safe(out, from, op, limit); -+ from = window; /* more from start of window */ -+ op = wnext; -+ /* This (rare) case can create a situation where -+ the first chunkcopy below must be checked. -+ */ -+ } -+ } -+ if (op < len) { /* still need some from output */ -+ out = chunkcopy_safe(out, from, op, limit); -+ len -= op; -+ /* When dist is small the amount of data that can be -+ copied from the window is also small, and progress -+ towards the dangerous end of the output buffer is -+ also small. This means that for trivial memsets and -+ for chunkunroll_relaxed() a safety check is -+ unnecessary. However, these conditions may not be -+ entered at all, and in that case it's possible that -+ the main copy is near the end. -+ */ -+ out = chunkunroll_relaxed(out, &dist, &len); -+ out = chunkcopy_safe(out, out - dist, len, limit); -+ } -+ else { -+ /* from points to window, so there is no risk of -+ overlapping pointers requiring memset-like behaviour -+ */ -+ out = chunkcopy_safe(out, from, len, limit); -+ } -+#else - if (wnext == 0) { /* very common case */ - from += wsize - op; - if (op < len) { /* some from window */ -@@ -271,8 +318,18 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ - if (len > 1) - *out++ = *from++; - } -+#endif - } -- else { -+ else { -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ /* Whole reference is in range of current output. No -+ range checks are necessary because we start with room -+ for at least 258 bytes of output, so unroll and roundoff -+ operations can write beyond `out+len` so long as they -+ stay within 258 bytes of `out`. -+ */ -+ out = chunkcopy_lapped_relaxed(out, dist, len); -+#else - from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - *out++ = *from++; -@@ -284,7 +341,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ - *out++ = *from++; - if (len > 1) - *out++ = *from++; -- } -+ } -+#endif - } - } - else if ((op & 64) == 0) { /* 2nd level distance code */ -diff --git a/inffast.h b/inffast.h -index b8da8bb..0def2e3 100644 ---- a/inffast.h -+++ b/inffast.h -@@ -32,4 +32,374 @@ - subject to change. Applications should only use zlib.h. - */ - -+/* -+ * The chunk-copy code below deals with writing the decoded DEFLATE data to -+ * the output with SIMD methods to increase decode speed. Reading the input -+ * to the DEFLATE decoder with a wide, SIMD method can also increase decode -+ * speed. This option is supported on little endian machines, and reads the -+ * input data in 64-bit (8 byte) chunks. -+ */ -+ - void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); -+ -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ -+#include -+#include "zutil.h" -+#include -+ -+typedef uint8x16_t z_vec128i_t; -+ -+#define Z_STATIC_ASSERT(name, assert) typedef char name[(assert) ? 1 : -1] -+ -+#if __STDC_VERSION__ >= 199901L -+#define Z_RESTRICT restrict -+#else -+#define Z_RESTRICT -+#endif -+ -+#if defined(__clang__) || defined(__GNUC__) || defined(__llvm__) -+#define Z_BUILTIN_MEMCPY __builtin_memcpy -+#else -+#define Z_BUILTIN_MEMCPY zmemcpy -+#endif -+ -+/* -+ * chunk copy type: the z_vec128i_t type size should be exactly 128-bits -+ * and equal to CHUNKCOPY_CHUNK_SIZE. -+ */ -+#define CHUNKCOPY_CHUNK_SIZE sizeof(z_vec128i_t) -+ -+Z_STATIC_ASSERT(vector_128_bits_wide, -+ CHUNKCOPY_CHUNK_SIZE == sizeof(int8_t) * 16); -+ -+/* -+ * Ask the compiler to perform a wide, unaligned load with a machinevst1q_u8 -+ * instruction appropriate for the z_vec128i_t type. -+ */ -+static inline z_vec128i_t loadchunk( -+ const unsigned char FAR* s) -+{ -+ z_vec128i_t v; -+ Z_BUILTIN_MEMCPY(&v, s, sizeof(v)); -+ return v; -+} -+ -+/* -+ * Ask the compiler to perform a wide, unaligned store with a machine -+ * instruction appropriate for the z_vec128i_t type. -+ */ -+static inline void storechunk( -+ unsigned char FAR* d, -+ const z_vec128i_t v) -+{ -+ Z_BUILTIN_MEMCPY(d, &v, sizeof(v)); -+} -+ -+/* -+ * Perform a memcpy-like operation, assuming that length is non-zero and that -+ * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if -+ * the length is shorter than this. -+ * -+ * It also guarantees that it will properly unroll the data if the distance -+ * between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on -+ * in chunkcopy_relaxed(). -+ * -+ * Aside from better memory bus utilisation, this means that short copies -+ * (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop -+ * without iteration, which will hopefully make the branch prediction more -+ * reliable. -+ */ -+static inline unsigned char FAR* chunkcopy_core( -+ unsigned char FAR* out, -+ const unsigned char FAR* from, -+ unsigned len) -+{ -+ const int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; -+ storechunk(out, loadchunk(from)); -+ out += bump; -+ from += bump; -+ len /= CHUNKCOPY_CHUNK_SIZE; -+ while (len-- > 0) { -+ storechunk(out, loadchunk(from)); -+ out += CHUNKCOPY_CHUNK_SIZE; -+ from += CHUNKCOPY_CHUNK_SIZE; -+ } -+ return out; -+} -+ -+/* -+ * Like chunkcopy_core(), but avoid writing beyond of legal output. -+ * -+ * Accepts an additional pointer to the end of safe output. A generic safe -+ * copy would use (out + len), but it's normally the case that the end of the -+ * output buffer is beyond the end of the current copy, and this can still be -+ * exploited. -+ */ -+static inline unsigned char FAR* chunkcopy_core_safe( -+ unsigned char FAR* out, -+ const unsigned char FAR* from, -+ unsigned len, -+ unsigned char FAR* limit) -+{ -+ Assert(out + len <= limit, "chunk copy exceeds safety limit"); -+ if ((limit - out) < (ptrdiff_t) CHUNKCOPY_CHUNK_SIZE) { -+ const unsigned char FAR* Z_RESTRICT rfrom = from; -+ if (len & 8) { -+ Z_BUILTIN_MEMCPY(out, rfrom, 8); -+ out += 8; -+ rfrom += 8; -+ } -+ if (len & 4) { -+ Z_BUILTIN_MEMCPY(out, rfrom, 4); -+ out += 4; -+ rfrom += 4; -+ } -+ if (len & 2) { -+ Z_BUILTIN_MEMCPY(out, rfrom, 2); -+ out += 2; -+ rfrom += 2; -+ } -+ if (len & 1) { -+ *out++ = *rfrom++; -+ } -+ return out; -+ } -+ return chunkcopy_core(out, from, len); -+} -+ -+/* -+ * Perform short copies until distance can be rewritten as being at least -+ * CHUNKCOPY_CHUNK_SIZE. -+ * -+ * Assumes it's OK to overwrite at least the first 2*CHUNKCOPY_CHUNK_SIZE -+ * bytes of output even if the copy is shorter than this. This assumption -+ * holds within zlib inflate_fast(), which starts every iteration with at -+ * least 258 bytes of output space available (258 being the maximum length -+ * output from a single token; see inffast.c). -+ */ -+static inline unsigned char FAR* chunkunroll_relaxed( -+ unsigned char FAR* out, -+ unsigned FAR* dist, -+ unsigned FAR* len) -+{ -+ const unsigned char FAR* from = out - *dist; -+ while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) { -+ storechunk(out, loadchunk(from)); -+ out += *dist; -+ *len -= *dist; -+ *dist += *dist; -+ } -+ return out; -+} -+ -+/* -+ * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it in -+ * every 64-bit component of the 128-bit result (64-bit int splat). -+ */ -+static inline z_vec128i_t v_load64_dup(const void* src) -+{ -+ return vcombine_u8(vld1_u8(src), vld1_u8(src)); -+} -+ -+/* -+ * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it in -+ * every 32-bit component of the 128-bit result (32-bit int splat). -+ */ -+static inline z_vec128i_t v_load32_dup(const void* src) -+{ -+ int32_t i32; -+ Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32)); -+ return vreinterpretq_u8_s32(vdupq_n_s32(i32)); -+} -+ -+/* -+ * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it in -+ * every 16-bit component of the 128-bit result (16-bit int splat). -+ */ -+static inline z_vec128i_t v_load16_dup(const void* src) -+{ -+ int16_t i16; -+ Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16)); -+ return vreinterpretq_u8_s16(vdupq_n_s16(i16)); -+} -+ -+/* -+ * v_load8_dup(): load the 8-bit int *src and duplicate it in every 8-bit -+ * component of the 128-bit result (8-bit int splat). -+ */ -+static inline z_vec128i_t v_load8_dup(const void* src) -+{ -+ return vld1q_dup_u8((const uint8_t*) src); -+} -+ -+/* -+ * v_store_128(): store the 128-bit vec in a memory destination (that might -+ * not be 16-byte aligned) void* out. -+ */ -+static inline void v_store_128(unsigned char* out, const z_vec128i_t vec) -+{ -+ vst1q_u8(out, vec); -+} -+ -+/* -+ * Perform an overlapping copy which behaves as a memset() operation, but -+ * supporting periods other than one, and assume that length is non-zero and -+ * that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output -+ * even if the length is shorter than this. -+ */ -+static inline unsigned char FAR* chunkset_store_result( -+ unsigned len, -+ unsigned char FAR* out, -+ z_vec128i_t v) -+{ -+ do { -+ v_store_128(out, v); -+ out += sizeof(v); -+ len -= sizeof(v); -+ } while (len > 0); -+ return out; -+} -+ -+static inline unsigned char FAR* chunkset_core(unsigned char FAR* out, unsigned period, unsigned len) -+{ -+ z_vec128i_t v; -+ const int bump = ((len - 1) % sizeof(v)) + 1; -+ switch (period) { -+ case 1: -+ v = v_load8_dup(out - 1); -+ v_store_128(out, v); -+ out += bump; -+ len -= bump; -+ while (len > 0) { -+ v_store_128(out, v); -+ out += sizeof(v); -+ len -= sizeof(v); -+ } -+ return out; -+ case 2: -+ v = v_load16_dup(out - 2); -+ v_store_128(out, v); -+ out += bump; -+ len -= bump; -+ if (len > 0) { -+ v = v_load16_dup(out - 2); -+ out = chunkset_store_result(len, out, v); -+ } -+ return out; -+ case 4: -+ v = v_load32_dup(out - 4); -+ v_store_128(out, v); -+ out += bump; -+ len -= bump; -+ if (len > 0) { -+ v = v_load32_dup(out - 4); -+ out = chunkset_store_result(len, out, v); -+ } -+ return out; -+ case 8: -+ v = v_load64_dup(out - 8); -+ v_store_128(out, v); -+ out += bump; -+ len -= bump; -+ if (len > 0) { -+ v = v_load64_dup(out - 8); -+ out = chunkset_store_result(len, out, v); -+ } -+ return out; -+ } -+ out = chunkunroll_relaxed(out, &period, &len); -+ return chunkcopy_core(out, out - period, len); -+} -+ -+/* -+ * Perform a memcpy-like operation, but assume that length is non-zero and that -+ * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if -+ * the length is shorter than this. -+ * -+ * Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour -+ * of overlapping buffers, regardless of the distance between the pointers. -+ * This is reflected in the `restrict`-qualified pointers, allowing the -+ * compiler to re-order loads and stores. -+ */ -+static inline unsigned char FAR* chunkcopy_relaxed( -+ unsigned char FAR* Z_RESTRICT out, -+ const unsigned char FAR* Z_RESTRICT from, -+ unsigned len) -+{ -+ return chunkcopy_core(out, from, len); -+} -+ -+/* -+ * Like chunkcopy_relaxed(), but avoid writing beyond of legal output. -+ * -+ * Unlike chunkcopy_core_safe() above, no guarantee is made regarding the -+ * behaviour of overlapping buffers, regardless of the distance between the -+ * pointers. This is reflected in the `restrict`-qualified pointers, allowing -+ * the compiler to re-order loads and stores. -+ * -+ * Accepts an additional pointer to the end of safe output. A generic safe -+ * copy would use (out + len), but it's normally the case that the end of the -+ * output buffer is beyond the end of the current copy, and this can still be -+ * exploited. -+ */ -+static inline unsigned char FAR* chunkcopy_safe( -+ unsigned char FAR* out, -+ const unsigned char FAR* Z_RESTRICT from, -+ unsigned len, -+ unsigned char FAR* limit) -+{ -+ Assert(out + len <= limit, "chunk copy exceeds safety limit"); -+ return chunkcopy_core_safe(out, from, len, limit); -+} -+ -+/* -+ * Perform chunky copy within the same buffer, where the source and destination -+ * may potentially overlap. -+ * -+ * Assumes that len > 0 on entry, and that it's safe to write at least -+ * CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. -+ */ -+static inline unsigned char FAR* chunkcopy_lapped_relaxed( -+ unsigned char FAR* out, -+ unsigned dist, -+ unsigned len) -+{ -+ if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) { -+ return chunkset_core(out, dist, len); -+ } -+ return chunkcopy_core(out, out - dist, len); -+} -+ -+/* -+ * Behave like chunkcopy_lapped_relaxed(), but avoid writing beyond of legal -+ * output. -+ * -+ * Accepts an additional pointer to the end of safe output. A generic safe -+ * copy would use (out + len), but it's normally the case that the end of the -+ * output buffer is beyond the end of the current copy, and this can still be -+ * exploited. -+ */ -+static inline unsigned char FAR* chunkcopy_lapped_safe( -+ unsigned char FAR* out, -+ unsigned dist, -+ unsigned len, -+ unsigned char FAR* limit) -+{ -+ Assert(out + len <= limit, "chunk copy exceeds safety limit"); -+ if ((limit - out) < (ptrdiff_t) (3 * CHUNKCOPY_CHUNK_SIZE)) { -+ while (len-- > 0) { -+ *out = *(out - dist); -+ out++; -+ } -+ return out; -+ } -+ return chunkcopy_lapped_relaxed(out, dist, len); -+} -+ -+ -+#undef Z_STATIC_ASSERT -+#undef Z_RESTRICT -+#undef Z_BUILTIN_MEMCPY -+ -+#endif //defined(INFLATE_CHUNK_SIMD_NEON) -diff --git a/inflate.c b/inflate.c -index ca904e7..c78e05b 100644 ---- a/inflate.c -+++ b/inflate.c -@@ -429,9 +429,16 @@ unsigned copy; - - /* if it hasn't been done already, allocate space for the window */ - if (state->window == Z_NULL) { -+#if defined(INFLATE_CHUNK_SIMD_NEON) -+ unsigned wsize = 1U << state->wbits; -+ state->window = (unsigned char FAR *) -+ ZALLOC(strm, CHUNKCOPY_CHUNK_SIZE + wsize, -+ sizeof(unsigned char)); -+#else - state->window = (unsigned char FAR *) - ZALLOC(strm, 1U << state->wbits, - sizeof(unsigned char)); -+#endif - if (state->window == Z_NULL) return 1; - } - -diff --git a/adler32.c b/adler32.c -index e148022..e024a15 100644 ---- a/adler32.c -+++ b/adler32.c -@@ -83,7 +83,169 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); - # define MOD63(a) a %= BASE - #endif - --/* ========================================================================= */ -+#if defined(ADLER32_SIMD_NEON) -+#include -+/* -+ * Multiply-add bytes by [ 32, 31, 30, ... ] for s2. -+ */ -+uint32x4_t ZLIB_INTERNAL mul_add_bytes( -+ uint32x4_t v_s2, -+ uint16x8_t v_column_sum_1, -+ uint16x8_t v_column_sum_2, -+ uint16x8_t v_column_sum_3, -+ uint16x8_t v_column_sum_4) -+{ -+ v_s2 = vshlq_n_u32(v_s2, 5); -+ -+ v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_1), -+ (uint16x4_t) { 32, 31, 30, 29 }); -+ v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), -+ (uint16x4_t) { 28, 27, 26, 25 }); -+ v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_2), -+ (uint16x4_t) { 24, 23, 22, 21 }); -+ v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), -+ (uint16x4_t) { 20, 19, 18, 17 }); -+ v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_3), -+ (uint16x4_t) { 16, 15, 14, 13 }); -+ v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), -+ (uint16x4_t) { 12, 11, 10, 9 }); -+ v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_4), -+ (uint16x4_t) { 8, 7, 6, 5 }); -+ v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), -+ (uint16x4_t) { 4, 3, 2, 1 }); -+ return v_s2; -+} -+ -+/* -+ * Handle leftover data. -+ */ -+uLong ZLIB_INTERNAL leftover_handler(uint32_t s1, uint32_t s2, const Bytef *buf, z_size_t len) -+{ -+ if (len) { -+ if (len >= 16) { -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ s2 += (s1 += *buf++); -+ -+ len -= 16; -+ } -+ -+ while (len--) { -+ s2 += (s1 += *buf++); -+ } -+ -+ if (s1 >= BASE) -+ s1 -= BASE; -+ s2 %= BASE; -+ } -+ -+ /* -+ * Return the recombined sums. -+ */ -+ return s1 | (s2 << 16); -+} -+ -+uLong ZLIB_INTERNAL adler32_simd_(uLong adler, const Bytef *buf, z_size_t len) -+{ -+ /* -+ * Split Adler-32 into component sums. -+ */ -+ uint32_t s1 = adler & 0xffff; -+ uint32_t s2 = adler >> 16; -+ /* -+ * Serially compute s1 & s2, until the data is 16-byte aligned. -+ */ -+ if ((uintptr_t)buf & 0xf) { -+ while ((uintptr_t)buf & 0xf) { -+ s2 += (s1 += *buf++); -+ --len; -+ } -+ if (s1 >= BASE) -+ s1 -= BASE; -+ s2 %= BASE; -+ } -+ /* -+ * Process the data in blocks. -+ */ -+ const unsigned BLOCK_SIZE = 1 << 5; -+ z_size_t blocks = len / BLOCK_SIZE; -+ len -= blocks * BLOCK_SIZE; -+ while (blocks) { -+ unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */ -+ if (n > blocks) -+ n = (unsigned) blocks; -+ blocks -= n; -+ /* -+ * Process n blocks of data. At most NMAX data bytes can be -+ * processed before s2 must be reduced modulo BASE. -+ */ -+ uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, s1 * n }; -+ uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 }; -+ -+ uint16x8_t v_column_sum_1 = vdupq_n_u16(0); -+ uint16x8_t v_column_sum_2 = vdupq_n_u16(0); -+ uint16x8_t v_column_sum_3 = vdupq_n_u16(0); -+ uint16x8_t v_column_sum_4 = vdupq_n_u16(0); -+ do { -+ /* -+ * Load 32 input bytes. -+ */ -+ const uint8x16_t bytes1 = vld1q_u8((uint8_t*)(buf)); -+ const uint8x16_t bytes2 = vld1q_u8((uint8_t*)(buf + 16)); -+ /* -+ * Add previous block byte sum to v_s2. -+ */ -+ v_s2 = vaddq_u32(v_s2, v_s1); -+ /* -+ * Horizontally add the bytes for s1. -+ */ -+ v_s1 = vpadalq_u16(v_s1, vpadalq_u8(vpaddlq_u8(bytes1), bytes2)); -+ /* -+ * Vertically add the bytes for s2. -+ */ -+ v_column_sum_1 = vaddw_u8(v_column_sum_1, vget_low_u8 (bytes1)); -+ v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1)); -+ v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8 (bytes2)); -+ v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2)); -+ buf += BLOCK_SIZE; -+ } while (--n); -+ v_s2 = mul_add_bytes(v_s2, v_column_sum_1, v_column_sum_2, v_column_sum_3, v_column_sum_4); -+ /* -+ * Sum epi32 ints v_s1(s2) and accumulate in s1(s2). -+ */ -+ uint32x2_t sum1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1)); -+ uint32x2_t sum2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2)); -+ uint32x2_t s1s2 = vpadd_u32(sum1, sum2); -+ -+ s1 += vget_lane_u32(s1s2, 0); -+ s2 += vget_lane_u32(s1s2, 1); -+ /* -+ * Reduce. -+ */ -+ s1 %= BASE; -+ s2 %= BASE; -+ } -+ return leftover_handler(s1, s2, buf, len); -+ -+} -+#endif -+ - uLong ZEXPORT adler32_z(adler, buf, len) - uLong adler; - const Bytef *buf; -@@ -92,6 +254,11 @@ uLong ZEXPORT adler32_z(adler, buf, len) - unsigned long sum2; - unsigned n; - -+#if defined(ADLER32_SIMD_NEON) -+ if (buf && len >= 64) -+ return adler32_simd_(adler, buf, len); -+#endif -+ - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; ---- zlib-1.2.11/CMakeLists.txt 2020-08-04 14:35:44.023579477 +0800 -+++ CMakeLists.txt 2020-08-04 14:39:38.937798725 +0800 -@@ -145,6 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCC) - contrib/arm/arm_longest_match.h) - set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) - add_definitions(-DARM_NEON) -+ add_definitions(-DHASH_ARMV8_CRC32 -march=armv8-a+crc -DUNALIGNED_OK -DADLER32_SIMD_NEON -DINFLATE_CHUNK_SIMD_NEON -O3) - set(COMPILER ${CMAKE_C_COMPILER}) - # NEON is mandatory in ARMv8. - if(${COMPILER} MATCHES "aarch64") diff --git a/zlib-1.2.11.tar.xz b/zlib-1.2.11.tar.xz deleted file mode 100644 index 305b7a0..0000000 Binary files a/zlib-1.2.11.tar.xz and /dev/null differ diff --git a/0004-zlib-Optimize-CRC32.patch b/zlib-Optimize-CRC32.patch similarity index 71% rename from 0004-zlib-Optimize-CRC32.patch rename to zlib-Optimize-CRC32.patch index 1068625..a11ee46 100644 --- a/0004-zlib-Optimize-CRC32.patch +++ b/zlib-Optimize-CRC32.patch @@ -6,29 +6,29 @@ Subject: [PATCH] zlib: Optimize CRC32 This patch uses the NEON instruction set to optimize the CRC32 algorithm. -On the ARM architecture, we can optimize the efficiency of +On the ARM architecture, we can optimize the efficiency of crc32 through the interface provided by the neon instruction set. Modify by Li Qiang. --- - crc32.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 47 insertions(+) + crc32.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 48 insertions(+) diff --git a/crc32.c b/crc32.c -index 9580440..79ebdbd 100644 +index f8357b0..5c53068 100644 --- a/crc32.c +++ b/crc32.c -@@ -29,6 +29,9 @@ +@@ -28,6 +28,9 @@ #endif /* MAKECRCH */ - #include "zutil.h" /* for STDC and FAR definitions */ + #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ +#ifdef __aarch64__ +#include "arm_acle.h" +#endif - /* Definitions for doing the crc four data bytes at a time. */ - #if !defined(NOBYFOUR) && defined(Z_U4) -@@ -194,6 +197,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table() + /* + A CRC of a message is computed on N braids of words in the message, where +@@ -600,6 +603,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table() return (const z_crc_t FAR *)crc_table; } @@ -73,19 +73,20 @@ index 9580440..79ebdbd 100644 + return (crc_result ^ 0xffffffffL); +} + - /* ========================================================================= */ - #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) - #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 -@@ -204,6 +248,9 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + /* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for +@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) const unsigned char FAR *buf; z_size_t len; { + #ifdef __aarch64__ + return crc32_neon(crc, buf, len); + #endif - if (buf == Z_NULL) return 0UL; ++ + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; - #ifdef DYNAMIC_CRC_TABLE -- -1.8.3.1 +2.27.0 diff --git a/zlib.spec b/zlib.spec index 9bc0cac..3fe8135 100644 --- a/zlib.spec +++ b/zlib.spec @@ -1,26 +1,17 @@ Name: zlib -Version: 1.2.11 -Release: 24 +Version: 1.2.13 +Release: 1 Summary: A lossless data-compression library License: zlib and Boost URL: http://www.zlib.net Source0: http://www.zlib.net/zlib-%{version}.tar.xz # Patch0 get from fedora -Patch0: zlib-1.2.5-minizip-fixuncrypt.patch -# Patch1 to Patch3 get from http://www.gildor.org/en/projects/zlib -Patch1: 0001-Neon-Optimized-hash-chain-rebase.patch -Patch2: 0002-Porting-optimized-longest_match.patch -Patch3: 0003-arm64-specific-build-patch.patch -Patch4: 0004-zlib-Optimize-CRC32.patch -Patch5: zlib-1.2.11-SIMD.patch -Patch6: 0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch +Patch6000: backport-zlib-1.2.5-minizip-fixuncrypt.patch +Patch6001: backport-fix-undefined-buffer-detected-by-oss-fuzz.patch -Patch6000: fix-undefined-buffer-detected-by-oss-fuzz.patch -Patch6001: backport-0001-CVE-2018-25032.patch -Patch6002: backport-0002-CVE-2018-25032.patch -Patch6003: backport-0001-CVE-2022-37434.patch -Patch6004: backport-0002-CVE-2022-37434.patch +Patch9000: zlib-Optimize-CRC32.patch +Patch9001: Accelerate-Adler32-using-arm64-SVE-instructions.patch BuildRequires: automake, autoconf, libtool @@ -64,20 +55,12 @@ This package contains the development-related content related to minizip. %prep %setup -n %{name}-%{version} -%patch0 -p1 -%ifarch aarch64 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%endif %patch6000 -p1 %patch6001 -p1 -%patch6002 -p1 -%patch6003 -p1 -%patch6004 -p1 +%ifarch aarch64 +%patch9000 -p1 +%patch9001 -p1 +%endif %build export CFLAGS="$RPM_OPT_FLAGS" @@ -135,6 +118,9 @@ make test %{_libdir}/pkgconfig/minizip.pc %changelog +* Thu Dec 29 2022 zhoupengcheng - 1.2.13-1 +- update to zlib-1.2.13 + * Mon Dec 26 2022 zhoupengcheng - 1.2.11-24 - DESC:remove unapplied patches