!31 update to zlib-1.2.13

From: @zhoupengcheng11 Reviewed-by: @lvying6 Signed-off-by: @lvying6
2023-01-05 01:33:23 +00:00 · 2023-01-05 01:33:23 +00:00 · 19f7d91654
commit 19f7d91654
parent 3839aca4cd 3391c5df3e
15 changed files with 272 additions and 1378 deletions
--- a/0001-Neon-Optimized-hash-chain-rebase.patch
+++ b/0001-Neon-Optimized-hash-chain-rebase.patch
@ -1,170 +0,0 @@
 From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001
 From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 Date: Mon, 9 Apr 2018 13:52:17 -0700
 Subject: [PATCH 1/3] Neon-Optimized hash chain rebase
 This should help with compression of data, using NEON instructions
 (therefore useful for ARMv7/ARMv8).
 Original patch by Jun He.
 ---
 CMakeLists.txt                | 18 ++++++++
 contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++
 deflate.c                     |  7 +++
 3 files changed, 109 insertions(+)
 create mode 100644 contrib/arm/neon_slide_hash.h
 diff --git a/CMakeLists.txt b/CMakeLists.txt
 index 0fe939d..e9a74e9 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
@@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC)
         set(ZLIB_ASMS contrib/amd64/amd64-match.S)
     endif ()
 +    if(ARM_NEON)
 +        list(REMOVE_ITEM ZLIB_SRCS inflate.c)
 +        set(ZLIB_ARM_NEON_HDRS
 +            contrib/arm/chunkcopy.h
 +            contrib/arm/inffast_chunk.h
 +            contrib/arm/neon_slide_hash.h)
 +        set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
 +        add_definitions(-DARM_NEON)
 +        set(COMPILER ${CMAKE_C_COMPILER})
 +        # NEON is mandatory in ARMv8.
 +        if(${COMPILER} MATCHES "aarch64")
 +          set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
 +          # But it was optional for ARMv7.
 +        elseif(${COMPILER} MATCHES "arm")
 +          set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
 +        endif()
 +    endif()
 +
 	if(ZLIB_ASMS)
 		add_definitions(-DASMV)
 		set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
 diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h
 new file mode 100644
 index 0000000..0daffa1
 --- /dev/null
 +++ b/contrib/arm/neon_slide_hash.h
@@ -0,0 +1,84 @@
 +/* Copyright (C) 1995-2011, 2016 Mark Adler
 + * Copyright (C) 2017 ARM Holdings Inc.
 + * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 + *          Jun He <jun.he@arm.com>
 + * This software is provided 'as-is', without any express or implied
 + * warranty.  In no event will the authors be held liable for any damages
 + * arising from the use of this software.
 + * Permission is granted to anyone to use this software for any purpose,
 + * including commercial applications, and to alter it and redistribute it
 + * freely, subject to the following restrictions:
 + * 1. The origin of this software must not be misrepresented; you must not
 + *  claim that you wrote the original software. If you use this software
 + *    in a product, an acknowledgment in the product documentation would be
 + *    appreciated but is not required.
 + * 2. Altered source versions must be plainly marked as such, and must not be
 + *    misrepresented as being the original software.
 + * 3. This notice may not be removed or altered from any source distribution.
 + */
 +#ifndef __NEON_SLIDE_HASH__
 +#define __NEON_SLIDE_HASH__
 +
 +#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
 +#include "deflate.h"
 +#include <arm_neon.h>
 +
 +inline static void neon_slide_hash(deflate_state *s)
 +{
 +    /*
 +     * This is ASIMD implementation for hash table rebase
 +     * it assumes:
 +     * 1. hash chain offset (Pos) is 2 bytes
 +     * 2. hash table size is multiple*128 bytes
 +     * #1 should be true as Pos is defined as "ush"
 +     * #2 should be true as hash_bits are greater that 7
 +     */
 +    unsigned n, m;
 +    unsigned short wsize = s->w_size;
 +    uint16x8_t v, *p;
 +    size_t size;
 +
 +    size = s->hash_size*sizeof(s->head[0]);
 +    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
 +
 +    Assert(sizeof(Pos) == 2, "Wrong Pos size");
 +
 +    /* slide s->head */
 +    v = vdupq_n_u16(wsize);
 +    p = (uint16x8_t *)(s->head);
 +    n = size / (sizeof(uint16x8_t) * 8);
 +    do {
 +        p[0] = vqsubq_u16(p[0], v);
 +        p[1] = vqsubq_u16(p[1], v);
 +        p[2] = vqsubq_u16(p[2], v);
 +        p[3] = vqsubq_u16(p[3], v);
 +        p[4] = vqsubq_u16(p[4], v);
 +        p[5] = vqsubq_u16(p[5], v);
 +        p[6] = vqsubq_u16(p[6], v);
 +        p[7] = vqsubq_u16(p[7], v);
 +        p += 8;
 +    } while (--n);
 +#ifndef FASTEST
 +    /* slide s->prev */
 +    size = wsize*sizeof(s->prev[0]);
 +
 +    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
 +
 +    p = (uint16x8_t *)(s->prev);
 +    n = size / (sizeof(uint16x8_t) * 8);
 +    do {
 +        p[0] = vqsubq_u16(p[0], v);
 +        p[1] = vqsubq_u16(p[1], v);
 +        p[2] = vqsubq_u16(p[2], v);
 +        p[3] = vqsubq_u16(p[3], v);
 +        p[4] = vqsubq_u16(p[4], v);
 +        p[5] = vqsubq_u16(p[5], v);
 +        p[6] = vqsubq_u16(p[6], v);
 +        p[7] = vqsubq_u16(p[7], v);
 +        p += 8;
 +    } while (--n);
 +#endif
 +}
 +
 +#endif
 +#endif
 diff --git a/deflate.c b/deflate.c
 index 1ec7614..36f99ac 100644
 --- a/deflate.c
 +++ b/deflate.c
@@ -50,6 +50,9 @@
 /* @(#) $Id$ */
 #include "deflate.h"
 +#if __ARM_NEON
 +#include "contrib/arm/neon_slide_hash.h"
 +#endif
 const char deflate_copyright[] =
    " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -201,6 +204,9 @@ local const config configuration_table[10] = {
 local void slide_hash(s)
     deflate_state *s;
 {
 +#if ARM_NEON
 +    return neon_slide_hash(s);
 +#else
     unsigned n, m;
     Posf *p;
     uInt wsize = s->w_size;
@@ -222,6 +228,7 @@ local void slide_hash(s)
          */
     } while (--n);
 #endif
 +#endif
 }
 /* ========================================================================= */
 -- 
 2.19.0
--- a/0002-Porting-optimized-longest_match.patch
+++ b/0002-Porting-optimized-longest_match.patch
@ -1,218 +0,0 @@
 From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001
 From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 Date: Mon, 9 Apr 2018 15:14:19 -0700
 Subject: [PATCH 2/3] Porting optimized longest_match
 This patch was contributed to zlib-ng and features an improved longest_match
 function using the most distant hash code to reduce number of checks
 (see: http://www.gildor.org/en/projects/zlib).
 Original patch by Jun He.
 ---
 CMakeLists.txt                  |   3 +-
 contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++
 deflate.c                       |  11 ++-
 3 files changed, 152 insertions(+), 4 deletions(-)
 create mode 100644 contrib/arm/arm_longest_match.h
 diff --git a/CMakeLists.txt b/CMakeLists.txt
 index e9a74e9..3826eba 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
@@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
         set(ZLIB_ARM_NEON_HDRS
             contrib/arm/chunkcopy.h
             contrib/arm/inffast_chunk.h
 -            contrib/arm/neon_slide_hash.h)
 +            contrib/arm/neon_slide_hash.h
 +            contrib/arm/arm_longest_match.h)
         set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
         add_definitions(-DARM_NEON)
         set(COMPILER ${CMAKE_C_COMPILER})
 diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h
 new file mode 100644
 index 0000000..9e7083f
 --- /dev/null
 +++ b/contrib/arm/arm_longest_match.h
@@ -0,0 +1,142 @@
 +/* Copyright (C) 1995-2011, 2016 Mark Adler
 + * Copyright (C) 2017 ARM Holdings Inc.
 + * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 + *          Jun He <jun.he@arm.com>
 + * This software is provided 'as-is', without any express or implied
 + * warranty.  In no event will the authors be held liable for any damages
 + * arising from the use of this software.
 + * Permission is granted to anyone to use this software for any purpose,
 + * including commercial applications, and to alter it and redistribute it
 + * freely, subject to the following restrictions:
 + * 1. The origin of this software must not be misrepresented; you must not
 + *  claim that you wrote the original software. If you use this software
 + *    in a product, an acknowledgment in the product documentation would be
 + *    appreciated but is not required.
 + * 2. Altered source versions must be plainly marked as such, and must not be
 + *    misrepresented as being the original software.
 + * 3. This notice may not be removed or altered from any source distribution.
 + */
 +#ifndef __ARM_LONGEST__MATCH__
 +#define __ARM_LONGEST__MATCH__
 +
 +#if defined(ARM_NEON)
 +#include "deflate.h"
 +#include <stdint.h>
 +static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max)
 +{
 +    register int len = 0;
 +    register unsigned long xor = 0;
 +    register int check_loops = max/sizeof(unsigned long);
 +    while(check_loops-- > 0) {
 +        xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len));
 +        if (xor) break;
 +        len += sizeof(unsigned long);
 +    }
 +    if (0 == xor) {
 +        while (len < max) {
 +            if (a[len] != b[len]) break;
 +            len++;
 +        }
 +        return len;
 +    }
 +    xor = __builtin_ctzl(xor)>>3;
 +    return len + xor;
 +}
 +
 +/*
 + * This implementation is based on algorithm described at:
 + * http://www.gildor.org/en/projects/zlib
 + * It uses the hash chain indexed by the most distant hash code to
 + * reduce number of checks.
 + * This also eliminates the those unnecessary check loops in legacy
 + * longest_match's do..while loop if the "most distant code" is out
 + * of search buffer
 + *
 + */
 +static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) {
 +    unsigned chain_length = s->max_chain_length;/* max hash chain length */
 +    unsigned char *scan = s->window + s->strstart; /* current string */
 +    unsigned char *match;                       /* matched string */
 +    unsigned int len;                  /* length of current match */
 +    unsigned int best_len = s->prev_length;     /* best match length so far */
 +    unsigned int nice_match = s->nice_match;    /* stop if match long enough */
 +    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
 +        s->strstart - (IPos)MAX_DIST(s) : 0;
 +    /* Stop when cur_match becomes <= limit. To simplify the code,
 +     * we prevent matches with the string of window index 0.
 +     */
 +    int offset = 0;  /* offset of the head[most_distant_hash] from IN cur_match */
 +    Pos *prev = s->prev;
 +    unsigned int wmask = s->w_mask;
 +    unsigned char *scan_buf_base = s->window;
 +
 +    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
 +     * It is easy to get rid of this optimization if necessary.
 +     */
 +    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
 +
 +    /* Do not look for matches beyond the end of the input. This is necessary
 +     * to make deflate deterministic.
 +     */
 +    if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead;
 +
 +    Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
 +
 +    /* find most distant hash code for lazy_match */
 +    if (best_len > MIN_MATCH) {
 +        /* search for most distant hash code */
 +        int i;
 +        uint16_t hash = 0;
 +        IPos pos;
 +
 +        UPDATE_HASH(s, hash, scan[1]);
 +        UPDATE_HASH(s, hash, scan[2]);
 +        for (i = 3; i <= best_len; i++) {
 +            UPDATE_HASH(s, hash, scan[i]);
 +            /* get head IPos of hash calced by scan[i-2..i] */
 +            pos = s->head[hash];
 +            /* compare it to current "farthest hash" IPos */
 +            if (pos <= cur_match) {
 +                /* we have a new "farthest hash" now */
 +                offset = i - 2;
 +                cur_match = pos;
 +            }
 +        }
 +
 +        /* update variables to correspond offset */
 +        limit += offset;
 +        /*
 +         * check if the most distant code's offset is out of search buffer
 +         * if it is true, then this means scan[offset..offset+2] are not
 +	 * presented in the search buffer. So we just return best_len 
 +	 * we've found.
 +         */
 +        if (cur_match < limit) return best_len;
 +
 +        scan_buf_base -= offset;
 +        /* reduce hash search depth based on best_len */
 +        chain_length /= best_len - MIN_MATCH;
 +    }
 +
 +    do {
 +        Assert(cur_match < s->strstart, "no future");
 +
 +        /* Determine matched length at current pos */
 +        match = scan_buf_base + cur_match;
 +        len = get_match_len(match, scan, MAX_MATCH);
 +
 +        if (len > best_len) {
 +            /* found longer string */
 +            s->match_start = cur_match - offset;
 +            best_len = len;
 +            /* good enough? */
 +            if (len >= nice_match) break;
 +        }
 +        /* move to prev pos in this hash chain */
 +    } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
 +
 +    return (best_len <= s->lookahead)? best_len : s->lookahead;
 +}
 +
 +#endif
 +#endif
 diff --git a/deflate.c b/deflate.c
 index 36f99ac..4c42259 100644
 --- a/deflate.c
 +++ b/deflate.c
@@ -50,9 +50,6 @@
 /* @(#) $Id$ */
 #include "deflate.h"
 -#if __ARM_NEON
 -#include "contrib/arm/neon_slide_hash.h"
 -#endif
 const char deflate_copyright[] =
    " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -196,6 +193,11 @@ local const config configuration_table[10] = {
     s->head[s->hash_size-1] = NIL; \
     zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
 +#if defined(ARM_NEON)
 +#include "contrib/arm/arm_longest_match.h"
 +#include "contrib/arm/neon_slide_hash.h"
 +#endif
 +
 /* ===========================================================================
  * Slide the hash table when sliding the window down (could be avoided with 32
  * bit values at the expense of memory usage). We slide even when level == 0 to
@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match)
     deflate_state *s;
     IPos cur_match;                             /* current match */
 {
 +#if defined(ARM_NEON)
 +    return arm_longest_match(s, cur_match);
 +#endif
     unsigned chain_length = s->max_chain_length;/* max hash chain length */
     register Bytef *scan = s->window + s->strstart; /* current string */
     register Bytef *match;                      /* matched string */
 -- 
 2.19.0
--- a/0003-arm64-specific-build-patch.patch
+++ b/0003-arm64-specific-build-patch.patch
@ -1,115 +0,0 @@
 From e0be75f8dce27a4e32196529df2a08dca791a286 Mon Sep 17 00:00:00 2001
 From: Jeremy Linton <jeremy.linton@arm.com>
 Date: Fri, 6 Apr 2018 11:46:42 -0500
 Subject: [PATCH 3/3] arm64 specific build patch
 ---
 Makefile.in           | 19 ++++++++++++-------
 configure             |  2 +-
 contrib/minizip/zip.c |  6 ++++--
 3 files changed, 17 insertions(+), 10 deletions(-)
 diff --git a/Makefile.in b/Makefile.in
 index 5a77949..9f088e5 100644
 --- a/Makefile.in
 +++ b/Makefile.in
@@ -57,7 +57,7 @@ SRCDIR=
 ZINC=
 ZINCOUT=-I.
 -OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o
 +OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o
 OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o
 OBJC = $(OBJZ) $(OBJG)
@@ -163,16 +163,16 @@ crc32.o: $(SRCDIR)crc32.c
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
 deflate.o: $(SRCDIR)deflate.c
 -	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
 +	$(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)deflate.c
 infback.o: $(SRCDIR)infback.c
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c
 inffast.o: $(SRCDIR)inffast.c
 -	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c
 +	$(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c
 inflate.o: $(SRCDIR)inflate.c
 -	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c
 +	$(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inflate.c
 inftrees.o: $(SRCDIR)inftrees.c
 	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c
@@ -214,7 +214,7 @@ crc32.lo: $(SRCDIR)crc32.c
 deflate.lo: $(SRCDIR)deflate.c
 	-@mkdir objs 2>/dev/null || test -d objs
 -	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
 +	$(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
 	-@mv objs/deflate.o $@
 infback.lo: $(SRCDIR)infback.c
@@ -222,14 +222,19 @@ infback.lo: $(SRCDIR)infback.c
 	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c
 	-@mv objs/infback.o $@
 +arminffast.lo: $(SRCDIR)contrib/arm/inffast_chunk.c $(SRCDIR)inffast.c
 +	-@mkdir objs 2>/dev/null || test -d objs
 +	$(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/arminffast.o $(SRCDIR)contrib/arm/inffast_chunk.c
 +	-@mv objs/arminffast.o $@
 +
 inffast.lo: $(SRCDIR)inffast.c
 	-@mkdir objs 2>/dev/null || test -d objs
 -	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
 +	$(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
 	-@mv objs/inffast.o $@
 inflate.lo: $(SRCDIR)inflate.c
 	-@mkdir objs 2>/dev/null || test -d objs
 -	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
 +	$(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
 	-@mv objs/inflate.o $@
 inftrees.lo: $(SRCDIR)inftrees.c
 diff --git a/configure b/configure
 index e974d1f..0c5f837 100755
 --- a/configure
 +++ b/configure
@@ -23,7 +23,7 @@ SRCDIR=`dirname $0`
 if test $SRCDIR = "."; then
     ZINC=""
     ZINCOUT="-I."
 -    SRCDIR=""
 +    SRCDIR="./"
 else
     ZINC='-include zconf.h'
     ZINCOUT='-I. -I$(SRCDIR)'
 diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c
 index 44e88a9..0517930 100644
 --- a/contrib/minizip/zip.c
 +++ b/contrib/minizip/zip.c
@@ -519,15 +519,17 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f
       break;
     for (i=(int)uReadSize-3; (i--)>0;)
 +    {
       if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) &&
         ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06))
       {
         uPosFound = uReadPos+i;
         break;
       }
 +    }
 -      if (uPosFound!=0)
 -        break;
 +    if (uPosFound!=0)
 +      break;
   }
   TRYFREE(buf);
   return uPosFound;
 -- 
 2.19.0
--- a/0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch
+++ b/0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch
@ -1,175 +0,0 @@
 From 41ebac8b7d7485a5396ae25ce2412cafcd03f1a2 Mon Sep 17 00:00:00 2001
 From: liqiang <liqiang64@huawei.com>
 Date: Thu, 2 Sep 2021 17:31:48 +0800
 Subject: [PATCH] Accelerate Adler32 using arm64 SVE instructions
 	This patch uses the SVE instruction set to rewrite the Adler32
 	algorithm (checksum algorithm in libz). By dividing the data into
 	blocks, a vector operation can complete a data block in parallel.
 	Measured on a Taishan 1951 machine that supports 256bit width SVE,
 	this algorithm is about 3~5 times faster than the algorithm implemented
 	in C language in libz. The wider the bit width, the better the
 	acceleration effect. Below are the results of my measured random
 	data of 1M and 10M:
 		[root@xxx adler32]# ./benchmark 1000000
 		Libz alg: Time used:    608 us, 1644.7 Mb/s.
 		SVE  alg: Time used:    166 us, 6024.1 Mb/s.
 		[root@xxx adler32]# ./benchmark 10000000
 		Libz alg: Time used:   6484 us, 1542.3 Mb/s.
 		SVE  alg: Time used:   2034 us, 4916.4 Mb/s.
 	On machines that support ARM64 sve instructions, this algorithm can
 	effectively accelerate adler32, thereby achieving the effect of improving
 	the performance of the basic compression algorithm libz.
 	In the implementation of this patch, blocks can be of any size, so the
 	algorithm can automatically adapt to SVE hardware with different bit
 	widths without modifying the code.
 Signed-off-by: liqiang <liqiang64@huawei.com>
 ---
 contrib/arm/adler32_sve.S | 129 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 contrib/arm/adler32_sve.S
 diff --git a/contrib/arm/adler32_sve.S b/contrib/arm/adler32_sve.S
 new file mode 100644
 index 0000000..97c5930
 --- /dev/null
 +++ b/contrib/arm/adler32_sve.S
@@ -0,0 +1,129 @@
 +/******************************************************************************
 + * Copyright (c) Huawei Technologies Co., Ltd. 2018-2020. All rights reserved.
 + * iSulad licensed under the Mulan PSL v2.
 + * You can use this software according to the terms and conditions of the Mulan PSL v2.
 + * You may obtain a copy of Mulan PSL v2 at:
 + *     http://license.coscl.org.cn/MulanPSL2
 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
 + * PURPOSE.
 + * See the Mulan PSL v2 for more details.
 + * Author: liqiang
 + * Create: 2020-07-13
 + * Description: Use SVE instruction to optimize adler32 algorithm.
 + * Enhancement: 2020-10-13 
 +                Automatically support different SVE vector length(128~2048). 
 + ******************************************************************************/
 +
 +.file "adler32_sve.S"
 +.text
 +.align 4
 +
 +//The supported sve vector length range is 128~2048 by this Adler_sequence
 +.Adler_sequence:
 +    .short 256,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225,224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193,192,191,190,189,188,187,186,185,184,183,182,181,180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,164,163,162,161,160,159,158,157,156,155,154,153,152,151,150,149,148,147,146,145,144,143,142,141,140,139,138,137,136,135,134,133,132,131,130,129,128,127,126,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
 +
 +.global adler32_sve
 +.type adler32_sve, %function
 +adler32_sve:
 +    // x0: unsigned long adler
 +    // x1: const unsigned char *buf
 +    // x2: unsigned long len
 +    
 +    // w10 : A = adler & 0xffff
 +    // w11 : B = (adler >> 16) & 0xffff
 +    // first byte A = 1, B = 0
 +    and w10, w0, #0xffff
 +    lsr w11, w0, #16
 +    // less than and equal 63byte, jumper to normal proc
 +    cmp x2, #0x3f
 +    b.le Lnormal_proc
 +    
 +    // Get the length of the sve vector to x6.
 +    mov x6, #0
 +    addvl x6, x6, #1
 +    adr x12, .Adler_sequence
 +    ptrue p0.h
 +
 +    // Get the starting position of the required sequence.
 +    mov x9, #256
 +    sub x9, x9, x6
 +    ld1h z24.h, p0/z, [x12, x9, lsl #1] // taps1 to z24.h
 +    inch x9
 +    ld1h z25.h, p0/z, [x12, x9, lsl #1] // taps2 to z25.h
 +    // must bigger than 64byte 
 +    ptrue p0.b
 +    ptrue p1.h
 +    mov x9, #0
 +.align 4
 +LBig_loop:
 +    // x is SVE vector length.
 +    // Bn = Bn-1 + An-1 * x + x * D1 + (x-1) * D2 + ... + 1 * Dx
 +    // An = An-1 + D1 + D2 + D3 + ... + Dx
 +    
 +    .macro ADLER_BLOCK_32
 +    ld1b z0.b, p0/z, [x1, x9]
 +    
 +    uaddv d20, p0, z0.b // D1 + D2 + ... + D32
 +    mov x12, v20.2d[0]  // mov sum to w12
 +    madd x11, x10, x6, x11 // Bn = An-1 * 32 + Bn-1
 +
 +    uunpklo z26.h, z0.b
 +    uunpkhi z27.h, z0.b
 +    mul z26.h, p1/m, z26.h, z24.h // x * D1 + (x-1) * D2 + ... + (x/2 + 1) * D(x/2)
 +    mul z27.h, p1/m, z27.h, z25.h // (x/2) * D(x/2 + 1) + (x/2 - 1) * D(x/2 + 2) + ... + 1 * Dx
 +
 +    uaddv d21, p1, z26.h
 +    uaddv d22, p1, z27.h
 +    mov x13, v21.2d[0]
 +    mov x14, v22.2d[0]
 +
 +    add x11, x13, x11
 +    add x11, x14, x11     // Bn += x * D1 + (x-1) * D2 + ... + 1 * Dx
 +    add x10, x12, x10     // An += D1 + D2 + ... + Dx
 +    incb x9
 +    .endm
 +    mov x15, #4
 +    ADLER_BLOCK_32
 +    ADLER_BLOCK_32
 +    ADLER_BLOCK_32
 +    ADLER_BLOCK_32
 +    
 +    // calc = reg0 % 65521
 +    .macro mod65521, reg0, reg1, reg2
 +    mov w\reg1, #0x8071
 +    mov w\reg2, #0xfff1
 +    movk w\reg1, #0x8007, lsl #16
 +    umull x\reg1, w\reg0, w\reg1
 +    lsr x\reg1, x\reg1, #47
 +    msub w\reg0, w\reg1, w\reg2, w\reg0
 +    .endm
 +    
 +    mod65521 10, 14, 16
 +    mod65521 11, 14, 16
 +
 +Lloop_cond:
 +    mul x12, x6, x15
 +    sub x2, x2, x12
 +    cmp x2, x12
 +    b.ge LBig_loop
 +
 +Lnormal_proc:
 +    cmp x2, #0
 +    b.eq Lret
 +
 +    ldrb w15, [x1, x9]
 +    add x9, x9, #1
 +    add x10, x15, x10
 +    add x11, x10, x11
 +    sub x2, x2, #1
 +    b Lnormal_proc
 +
 +Lret:
 +    mod65521 10, 14, 5
 +    mod65521 11, 14, 5
 +    lsl x11, x11, #16
 +    orr x0, x10, x11
 +    ret
 +
 +.size adler32_sve, .-adler32_sve
 -- 
 2.17.1
--- a/backport-0001-CVE-2018-25032.patch
+++ b/backport-0001-CVE-2018-25032.patch
@ -1,346 +0,0 @@
 From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001
 From: Mark Adler <madler@alumni.caltech.edu>
 Date: Tue, 17 Apr 2018 22:09:22 -0700
 Subject: [PATCH] Fix a bug that can crash deflate on some input when using
 Z_FIXED.
 This bug was reported by Danilo Ramos of Eideticom, Inc. It has
 lain in wait 13 years before being found! The bug was introduced
 in zlib 1.2.2.2, with the addition of the Z_FIXED option. That
 option forces the use of fixed Huffman codes. For rare inputs with
 a large number of distant matches, the pending buffer into which
 the compressed data is written can overwrite the distance symbol
 table which it overlays. That results in corrupted output due to
 invalid distances, and can result in out-of-bound accesses,
 crashing the application.
 The fix here combines the distance buffer and literal/length
 buffers into a single symbol buffer. Now three bytes of pending
 buffer space are opened up for each literal or length/distance
 pair consumed, instead of the previous two bytes. This assures
 that the pending buffer cannot overwrite the symbol table, since
 the maximum fixed code compressed length/distance is 31 bits, and
 since there are four bytes of pending space for every three bytes
 of symbol space.
 ---
 deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++---------------
 deflate.h | 25 +++++++++----------
 trees.c   | 49 +++++++++++-------------------------
 3 files changed, 79 insertions(+), 69 deletions(-)
 diff --git a/deflate.c b/deflate.c
 index ef981b0..3db6d75 100644
 --- a/deflate.c
 +++ b/deflate.c
@@ -269,11 +269,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     int wrap = 1;
     static const char my_version[] = ZLIB_VERSION;
 -    ushf *overlay;
 -    /* We overlay pending_buf and d_buf+l_buf. This works since the average
 -     * output size for (length,distance) codes is <= 24 bits.
 -     */
 -
     if (version == Z_NULL || version[0] != my_version[0] ||
         stream_size != sizeof(z_stream)) {
         return Z_VERSION_ERROR;
@@ -344,9 +339,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
 -    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
 -    s->pending_buf = (uchf *) overlay;
 -    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
 +    /* We overlay pending_buf and sym_buf. This works since the average size
 +     * for length/distance pairs over any compressed block is assured to be 31
 +     * bits or less.
 +     *
 +     * Analysis: The longest fixed codes are a length code of 8 bits plus 5
 +     * extra bits, for lengths 131 to 257. The longest fixed distance codes are
 +     * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
 +     * possible fixed-codes length/distance pair is then 31 bits total.
 +     *
 +     * sym_buf starts one-fourth of the way into pending_buf. So there are
 +     * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
 +     * in sym_buf is three bytes -- two for the distance and one for the
 +     * literal/length. As each symbol is consumed, the pointer to the next
 +     * sym_buf value to read moves forward three bytes. From that symbol, up to
 +     * 31 bits are written to pending_buf. The closest the written pending_buf
 +     * bits gets to the next sym_buf symbol to read is just before the last
 +     * code is written. At that time, 31*(n-2) bits have been written, just
 +     * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
 +     * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
 +     * symbols are written.) The closest the writing gets to what is unread is
 +     * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
 +     * can range from 128 to 32768.
 +     *
 +     * Therefore, at a minimum, there are 142 bits of space between what is
 +     * written and what is read in the overlain buffers, so the symbols cannot
 +     * be overwritten by the compressed data. That space is actually 139 bits,
 +     * due to the three-bit fixed-code block header.
 +     *
 +     * That covers the case where either Z_FIXED is specified, forcing fixed
 +     * codes, or when the use of fixed codes is chosen, because that choice
 +     * results in a smaller compressed block than dynamic codes. That latter
 +     * condition then assures that the above analysis also covers all dynamic
 +     * blocks. A dynamic-code block will only be chosen to be emitted if it has
 +     * fewer bits than a fixed-code block would for the same set of symbols.
 +     * Therefore its average symbol length is assured to be less than 31. So
 +     * the compressed data for a dynamic block also cannot overwrite the
 +     * symbols from which it is being constructed.
 +     */
 +
 +    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
 +    s->pending_buf_size = (ulg)s->lit_bufsize * 4;
     if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
         s->pending_buf == Z_NULL) {
@@ -355,8 +388,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
         deflateEnd (strm);
         return Z_MEM_ERROR;
     }
 -    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
 -    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
 +    s->sym_buf = s->pending_buf + s->lit_bufsize;
 +    s->sym_end = (s->lit_bufsize - 1) * 3;
 +    /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
 +     * on 16 bit machines and because stored blocks are restricted to
 +     * 64K-1 bytes.
 +     */
     s->level = level;
     s->strategy = strategy;
@@ -567,7 +604,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
 -    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
 +    if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
         return Z_BUF_ERROR;
     do {
         put = Buf_size - s->bi_valid;
@@ -1126,7 +1163,6 @@ int ZEXPORT deflateCopy (dest, source)
 #else
     deflate_state *ds;
     deflate_state *ss;
 -    ushf *overlay;
     if (deflateStateCheck(source) || dest == Z_NULL) {
@@ -1146,8 +1182,7 @@ int ZEXPORT deflateCopy (dest, source)
     ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
     ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
     ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
 -    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
 -    ds->pending_buf = (uchf *) overlay;
 +    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
     if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
         ds->pending_buf == Z_NULL) {
@@ -1161,8 +1196,7 @@ int ZEXPORT deflateCopy (dest, source)
     zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
     ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
 -    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
 -    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
 +    ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
     ds->l_desc.dyn_tree = ds->dyn_ltree;
     ds->d_desc.dyn_tree = ds->dyn_dtree;
@@ -1934,7 +1968,7 @@ local block_state deflate_fast(s, flush)
         FLUSH_BLOCK(s, 1);
         return finish_done;
     }
 -    if (s->last_lit)
 +    if (s->sym_next)
         FLUSH_BLOCK(s, 0);
     return block_done;
 }
@@ -2065,7 +2099,7 @@ local block_state deflate_slow(s, flush)
         FLUSH_BLOCK(s, 1);
         return finish_done;
     }
 -    if (s->last_lit)
 +    if (s->sym_next)
         FLUSH_BLOCK(s, 0);
     return block_done;
 }
@@ -2140,7 +2174,7 @@ local block_state deflate_rle(s, flush)
         FLUSH_BLOCK(s, 1);
         return finish_done;
     }
 -    if (s->last_lit)
 +    if (s->sym_next)
         FLUSH_BLOCK(s, 0);
     return block_done;
 }
@@ -2179,7 +2213,7 @@ local block_state deflate_huff(s, flush)
         FLUSH_BLOCK(s, 1);
         return finish_done;
     }
 -    if (s->last_lit)
 +    if (s->sym_next)
         FLUSH_BLOCK(s, 0);
     return block_done;
 }
 diff --git a/deflate.h b/deflate.h
 index 23ecdd3..d4cf1a9 100644
 --- a/deflate.h
 +++ b/deflate.h
@@ -217,7 +217,7 @@ typedef struct internal_state {
     /* Depth of each subtree used as tie breaker for trees of equal frequency
      */
 -    uchf *l_buf;          /* buffer for literals or lengths */
 +    uchf *sym_buf;        /* buffer for distances and literals/lengths */
     uInt  lit_bufsize;
     /* Size of match buffer for literals/lengths.  There are 4 reasons for
@@ -239,13 +239,8 @@ typedef struct internal_state {
      *   - I can't count above 4
      */
 -    uInt last_lit;      /* running index in l_buf */
 -
 -    ushf *d_buf;
 -    /* Buffer for distances. To simplify the code, d_buf and l_buf have
 -     * the same number of elements. To use different lengths, an extra flag
 -     * array would be necessary.
 -     */
 +    uInt sym_next;      /* running index in sym_buf */
 +    uInt sym_end;       /* symbol table full when sym_next reaches this */
     ulg opt_len;        /* bit length of current block with optimal trees */
     ulg static_len;     /* bit length of current block with static trees */
@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
 # define _tr_tally_lit(s, c, flush) \
   { uch cc = (c); \
 -    s->d_buf[s->last_lit] = 0; \
 -    s->l_buf[s->last_lit++] = cc; \
 +    s->sym_buf[s->sym_next++] = 0; \
 +    s->sym_buf[s->sym_next++] = 0; \
 +    s->sym_buf[s->sym_next++] = cc; \
     s->dyn_ltree[cc].Freq++; \
 -    flush = (s->last_lit == s->lit_bufsize-1); \
 +    flush = (s->sym_next == s->sym_end); \
    }
 # define _tr_tally_dist(s, distance, length, flush) \
   { uch len = (uch)(length); \
     ush dist = (ush)(distance); \
 -    s->d_buf[s->last_lit] = dist; \
 -    s->l_buf[s->last_lit++] = len; \
 +    s->sym_buf[s->sym_next++] = dist; \
 +    s->sym_buf[s->sym_next++] = dist >> 8; \
 +    s->sym_buf[s->sym_next++] = len; \
     dist--; \
     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
     s->dyn_dtree[d_code(dist)].Freq++; \
 -    flush = (s->last_lit == s->lit_bufsize-1); \
 +    flush = (s->sym_next == s->sym_end); \
   }
 #else
 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
 diff --git a/trees.c b/trees.c
 index 50cf4b4..8bf5c80 100644
 --- a/trees.c
 +++ b/trees.c
@@ -416,7 +416,7 @@ local void init_block(s)
     s->dyn_ltree[END_BLOCK].Freq = 1;
     s->opt_len = s->static_len = 0L;
 -    s->last_lit = s->matches = 0;
 +    s->sym_next = s->matches = 0;
 }
 #define SMALLEST 1
@@ -947,7 +947,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
         Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
                 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
 -                s->last_lit));
 +	        s->sym_next / 3));
         if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
@@ -1016,8 +1016,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
     unsigned dist;  /* distance of matched string */
     unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
 {
 -    s->d_buf[s->last_lit] = (ush)dist;
 -    s->l_buf[s->last_lit++] = (uch)lc;
 +    s->sym_buf[s->sym_next++] = dist;
 +    s->sym_buf[s->sym_next++] = dist >> 8;
 +    s->sym_buf[s->sym_next++] = lc;
     if (dist == 0) {
         /* lc is the unmatched char */
         s->dyn_ltree[lc].Freq++;
@@ -1033,29 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
         s->dyn_dtree[d_code(dist)].Freq++;
     }
 -#ifdef TRUNCATE_BLOCK
 -    /* Try to guess if it is profitable to stop the current block here */
 -    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
 -        /* Compute an upper bound for the compressed length */
 -        ulg out_length = (ulg)s->last_lit*8L;
 -        ulg in_length = (ulg)((long)s->strstart - s->block_start);
 -        int dcode;
 -        for (dcode = 0; dcode < D_CODES; dcode++) {
 -            out_length += (ulg)s->dyn_dtree[dcode].Freq *
 -                (5L+extra_dbits[dcode]);
 -        }
 -        out_length >>= 3;
 -        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
 -               s->last_lit, in_length, out_length,
 -               100L - out_length*100L/in_length));
 -        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
 -    }
 -#endif
 -    return (s->last_lit == s->lit_bufsize-1);
 -    /* We avoid equality with lit_bufsize because of wraparound at 64K
 -     * on 16 bit machines and because stored blocks are restricted to
 -     * 64K-1 bytes.
 -     */
 +    return (s->sym_next == s->sym_end);
 }
 /* ===========================================================================
@@ -1068,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
 {
     unsigned dist;      /* distance of matched string */
     int lc;             /* match length or unmatched char (if dist == 0) */
 -    unsigned lx = 0;    /* running index in l_buf */
 +    unsigned sx = 0;    /* running index in sym_buf */
     unsigned code;      /* the code to send */
     int extra;          /* number of extra bits to send */
 -    if (s->last_lit != 0) do {
 -        dist = s->d_buf[lx];
 -        lc = s->l_buf[lx++];
 +   if (s->sym_next != 0) do {
 +        dist = s->sym_buf[sx++] & 0xff;
 +        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
 +        lc = s->sym_buf[sx++];
         if (dist == 0) {
             send_code(s, lc, ltree); /* send a literal byte */
             Tracecv(isgraph(lc), (stderr," '%c' ", lc));
@@ -1099,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
             }
         } /* literal or match pair ? */
 -        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
 -        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
 -               "pendingBuf overflow");
 +        /* Check that the overlay between pending_buf and sym_buf is ok: */
 +        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
 -    } while (lx < s->last_lit);
 +    } while (sx < s->sym_next);
     send_code(s, END_BLOCK, ltree);
 }
 -- 
 2.27.0
--- a/backport-0001-CVE-2022-37434.patch
+++ b/backport-0001-CVE-2022-37434.patch
@ -1,35 +0,0 @@
 From eff308af425b67093bab25f80f1ae950166bece1 Mon Sep 17 00:00:00 2001
 From: Mark Adler <fork@madler.net>
 Date: Sat, 30 Jul 2022 15:51:11 -0700
 Subject: [PATCH] Fix a bug when getting a gzip header extra field with
 inflate().
 If the extra field was larger than the space the user provided with
 inflateGetHeader(), and if multiple calls of inflate() delivered
 the extra header data, then there could be a buffer overflow of the
 provided space. This commit assures that provided space is not
 exceeded.
 ---
 inflate.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
 diff --git a/inflate.c b/inflate.c
 index 2a0ac30..95a38f5 100644
 --- a/inflate.c
 +++ b/inflate.c
@@ -765,9 +765,10 @@ int flush;
                 copy = state->length;
                 if (copy > have) copy = have;
                 if (copy) {
 +                    len = state->head->extra_len - state->length;
                     if (state->head != Z_NULL &&
 -                        state->head->extra != Z_NULL) {
 -                        len = state->head->extra_len - state->length;
 +                        state->head->extra != Z_NULL &&
 +                        len < state->head->extra_max) {
                         zmemcpy(state->head->extra + len, next,
                                 len + copy > state->head->extra_max ?
                                 state->head->extra_max - len : copy);
 -- 
 2.27.0
--- a/backport-0002-CVE-2018-25032.patch
+++ b/backport-0002-CVE-2018-25032.patch
@ -1,26 +0,0 @@
 From 4346a16853e19b45787ce933666026903fb8f3f8 Mon Sep 17 00:00:00 2001
 From: Mark Adler <madler@alumni.caltech.edu>
 Date: Tue, 17 Apr 2018 22:44:41 -0700
 Subject: [PATCH] Assure that the number of bits for deflatePrime() is valid.
 ---
 deflate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
 diff --git a/deflate.c b/deflate.c
 index 3db6d75..a4b0977 100644
 --- a/deflate.c
 +++ b/deflate.c
@@ -604,7 +604,8 @@ int ZEXPORT deflatePrime (strm, bits, value)
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
 -    if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
 +    if (bits < 0 || bits > 16 ||
 +        s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
         return Z_BUF_ERROR;
     do {
         put = Buf_size - s->bi_valid;
 -- 
 2.27.0
--- a/backport-0002-CVE-2022-37434.patch
+++ b/backport-0002-CVE-2022-37434.patch
@ -1,32 +0,0 @@
 From 1eb7682f845ac9e9bf9ae35bbfb3bad5dacbd91d Mon Sep 17 00:00:00 2001
 From: Mark Adler <fork@madler.net>
 Date: Mon, 8 Aug 2022 10:50:09 -0700
 Subject: [PATCH] Fix extra field processing bug that dereferences NULL
 state->head.
 The recent commit to fix a gzip header extra field processing bug
 introduced the new bug fixed here.
 ---
 inflate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/inflate.c b/inflate.c
 index 95a38f5..9c5934e 100644
 --- a/inflate.c
 +++ b/inflate.c
@@ -765,10 +765,10 @@ int flush;
                 copy = state->length;
                 if (copy > have) copy = have;
                 if (copy) {
 -                    len = state->head->extra_len - state->length;
                     if (state->head != Z_NULL &&
                         state->head->extra != Z_NULL &&
 -                        len < state->head->extra_max) {
 +                        (len = state->head->extra_len - state->length) <
 +                            state->head->extra_max) {
                         zmemcpy(state->head->extra + len, next,
                                 len + copy > state->head->extra_max ?
                                 state->head->extra_max - len : copy);
 -- 
 2.27.0
--- a/backport-fix-undefined-buffer-detected-by-oss-fuzz.patch
+++ b/backport-fix-undefined-buffer-detected-by-oss-fuzz.patch
--- a/backport-zlib-1.2.5-minizip-fixuncrypt.patch
+++ b/backport-zlib-1.2.5-minizip-fixuncrypt.patch
--- a/zlib-1.2.11-SIMD.patch
+++ b/zlib-1.2.11-SIMD.patch
@ -1,26 +1,247 @@
 From 91c1e78feec94739cc5da8562b3e2395bfdf6193 Mon Sep 17 00:00:00 2001
 From: hedongbo <hedongbo@huawei.com>
 Date: Sun, 14 Sep 2020 15:36:12 +0800
 Subject: [PATCH] zlib-1.2.11-SIMD.patch
 In the sampling of the Hive test program, it is found that inflate occupies a high proportion.
 The zlib is optimized through instruction set optimization, hash replacement, and compilation option optimization. 
 The inflate and deflate processes of the Zlib library provided by the JDK are optimized to shorten the invoking time.
 ---
 CMakeLists.txt |   6 +
 adler32.c      | 169 +++++++++++++++++++++-
 deflate.c      |  22 ++-
 inffast.c      |  62 ++++++++-
 inffast.h      | 370 +++++++++++++++++++++++++++++++++++++++++++++++++
 inflate.c      |   7 +
 6 files changed, 627 insertions(+), 9 deletions(-)
 diff --git a/CMakeLists.txt b/CMakeLists.txt
 index b412dc7..40dc533 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
@@ -126,6 +126,12 @@ if(NOT MINGW)
     )
 endif()
 +if(CMAKE_COMPILER_IS_GNUCC)
 +    if(ARM_NEON)
 +         add_definitions(-DHASH_ARMV8_CRC32 -march=armv8-a+crc -DUNALIGNED_OK -DADLER32_SIMD_NEON -DINFLATE_CHUNK_SIMD_NEON -O3)
 +    endif()
 +endif()
 +
 # parse the full version number from zlib.h and include in ZLIB_FULL_VERSION
 file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents)
 string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*"
 diff --git a/adler32.c b/adler32.c
 index d0be438..6ced75d 100644
 --- a/adler32.c
 +++ b/adler32.c
@@ -59,7 +59,169 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 #  define MOD63(a) a %= BASE
 #endif
 -/* ========================================================================= */
 +#if defined(ADLER32_SIMD_NEON)
 +#include <arm_neon.h>
 +/*
 + * Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
 + */
 +uint32x4_t ZLIB_INTERNAL mul_add_bytes(
 +    uint32x4_t v_s2,
 +    uint16x8_t v_column_sum_1,
 +    uint16x8_t v_column_sum_2,
 +    uint16x8_t v_column_sum_3,
 +    uint16x8_t v_column_sum_4)
 +{
 +    v_s2 = vshlq_n_u32(v_s2, 5);
 +
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_1),
 +        (uint16x4_t) { 32, 31, 30, 29 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1),
 +        (uint16x4_t) { 28, 27, 26, 25 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_2),
 +        (uint16x4_t) { 24, 23, 22, 21 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2),
 +        (uint16x4_t) { 20, 19, 18, 17 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_3),
 +        (uint16x4_t) { 16, 15, 14, 13 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3),
 +        (uint16x4_t) { 12, 11, 10,  9 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_4),
 +        (uint16x4_t) {  8,  7,  6,  5 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4),
 +        (uint16x4_t) {  4,  3,  2,  1 });
 +    return v_s2;
 +}
 +
 +/*
 + * Handle leftover data.
 + */
 +uLong ZLIB_INTERNAL leftover_handler(uint32_t s1, uint32_t s2, const Bytef *buf, z_size_t len)
 +{
 +    if (len) {
 +        if (len >= 16) {
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            len -= 16;
 +        }
 +
 +        while (len--) {
 +            s2 += (s1 += *buf++);
 +        }
 +
 +        if (s1 >= BASE)
 +            s1 -= BASE;
 +        s2 %= BASE;
 +    }
 +
 +    /*
 +     * Return the recombined sums.
 +     */
 +    return s1 | (s2 << 16);
 +}
 +
 +uLong ZLIB_INTERNAL adler32_simd_(uLong adler, const Bytef *buf, z_size_t len)
 +{
 +    /*
 +     * Split Adler-32 into component sums.
 +     */
 +    uint32_t s1 = adler & 0xffff;
 +    uint32_t s2 = adler >> 16;
 +    /*
 +     * Serially compute s1 & s2, until the data is 16-byte aligned.
 +     */
 +    if ((uintptr_t)buf & 0xf) {
 +        while ((uintptr_t)buf & 0xf) {
 +            s2 += (s1 += *buf++);
 +            --len;
 +        }
 +        if (s1 >= BASE)
 +            s1 -= BASE;
 +        s2 %= BASE;
 +    }
 +    /*
 +     * Process the data in blocks.
 +     */
 +    const unsigned BLOCK_SIZE = 1 << 5;
 +    z_size_t blocks = len / BLOCK_SIZE;
 +    len -= blocks * BLOCK_SIZE;
 +    while (blocks) {
 +        unsigned n = NMAX / BLOCK_SIZE;  /* The NMAX constraint. */
 +        if (n > blocks)
 +            n = (unsigned) blocks;
 +        blocks -= n;
 +        /*
 +         * Process n blocks of data. At most NMAX data bytes can be
 +         * processed before s2 must be reduced modulo BASE.
 +         */
 +        uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, s1 * n };
 +        uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 };
 +
 +        uint16x8_t v_column_sum_1 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
 +        do {
 +            /*
 +             * Load 32 input bytes.
 +             */
 +            const uint8x16_t bytes1 = vld1q_u8((uint8_t*)(buf));
 +            const uint8x16_t bytes2 = vld1q_u8((uint8_t*)(buf + 16));
 +            /*
 +             * Add previous block byte sum to v_s2.
 +             */
 +            v_s2 = vaddq_u32(v_s2, v_s1);
 +            /*
 +             * Horizontally add the bytes for s1.
 +             */
 +            v_s1 = vpadalq_u16(v_s1, vpadalq_u8(vpaddlq_u8(bytes1), bytes2));
 +            /*
 +             * Vertically add the bytes for s2.
 +             */
 +            v_column_sum_1 = vaddw_u8(v_column_sum_1, vget_low_u8 (bytes1));
 +            v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1));
 +            v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8 (bytes2));
 +            v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
 +            buf += BLOCK_SIZE;
 +        } while (--n);
 +        v_s2 = mul_add_bytes(v_s2, v_column_sum_1, v_column_sum_2, v_column_sum_3, v_column_sum_4);
 +        /*
 +         * Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
 +         */
 +        uint32x2_t sum1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1));
 +        uint32x2_t sum2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2));
 +        uint32x2_t s1s2 = vpadd_u32(sum1, sum2);
 +
 +        s1 += vget_lane_u32(s1s2, 0);
 +        s2 += vget_lane_u32(s1s2, 1);
 +        /*
 +         * Reduce.
 +         */
 +        s1 %= BASE;
 +        s2 %= BASE;
 +    }
 +    return leftover_handler(s1, s2, buf, len);
 +
 +}
 +#endif
 +
 uLong ZEXPORT adler32_z(adler, buf, len)
     uLong adler;
     const Bytef *buf;
@@ -68,6 +230,11 @@ uLong ZEXPORT adler32_z(adler, buf, len)
     unsigned long sum2;
     unsigned n;
 +#if defined(ADLER32_SIMD_NEON)
 +    if (buf && len >= 64)
 +        return adler32_simd_(adler, buf, len);
 +#endif
 +
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
     adler &= 0xffff;
 diff --git a/deflate.c b/deflate.c
-index f30f71b..c018064 100644
+index f290783..31d1cfe 100644
 --- a/deflate.c
 +++ b/deflate.c
-@@ -184,8 +184,16 @@ local const config configuration_table[10] = {
+@@ -154,7 +154,16 @@ local const config configuration_table[10] = {
  *    characters, so that a running hash key can be computed from the previous
  *    key instead of complete recalculation each time.
  */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+-#define UPDATE_HASH(s,h,c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
 +#if defined(HASH_ARMV8_CRC32)
 +#include <arm_acle.h>
 +#define UPDATE_HASH_CRC_INTERNAL(s, h, c) \
-+	(h = __crc32w(0, (c) & 0xFFFFFF) & ((deflate_state *)s)->hash_mask)
+       (h = __crc32w(0, (c) & 0xFFFFFF) & ((deflate_state *)s)->hash_mask)
- 
+
 +#define UPDATE_HASH(s, h, c) \
 +    UPDATE_HASH_CRC_INTERNAL(s, h, *(unsigned *)((uintptr_t)(&c) - (MIN_MATCH-1)))
 +#else
 +#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
 +#endif
 /* ===========================================================================
-  * Insert string str in the dictionary and set match_head to the previous head
+@@ -1226,14 +1235,15 @@ local unsigned read_buf(strm, buf, size)
@@ -1198,14 +1247,15 @@ local unsigned read_buf(strm, buf, size)
     strm->avail_in  -= len;
     zmemcpy(buf, strm->next_in, len);
@ -42,10 +263,10 @@ index f30f71b..c018064 100644
     strm->total_in += len;
 diff --git a/inffast.c b/inffast.c
-index 4bfc995..2084739 100644
+index 1fec7f3..84c5aba 100644
 --- a/inffast.c
 +++ b/inffast.c
-@@ -81,6 +81,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
+@@ -57,6 +57,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
     unsigned char FAR *out;     /* local strm->next_out */
     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
     unsigned char FAR *end;     /* while out < end, enough space available */
@ -55,7 +276,7 @@ index 4bfc995..2084739 100644
 #ifdef INFLATE_STRICT
     unsigned dmax;              /* maximum distance from zlib header */
 #endif
-@@ -113,7 +116,12 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
+@@ -89,7 +92,12 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 #endif
     wsize = state->wsize;
     whave = state->whave;
@ -68,7 +289,7 @@ index 4bfc995..2084739 100644
     window = state->window;
     hold = state->hold;
     bits = state->bits;
-@@ -221,6 +229,45 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
+@@ -197,6 +205,45 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 #endif
                     }
                     from = window;
@ -114,7 +335,7 @@ index 4bfc995..2084739 100644
                     if (wnext == 0) {           /* very common case */
                         from += wsize - op;
                         if (op < len) {         /* some from window */
-@@ -271,8 +318,18 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
+@@ -247,8 +294,18 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                         if (len > 1)
                             *out++ = *from++;
                     }
@ -134,7 +355,7 @@ index 4bfc995..2084739 100644
                     from = out - dist;          /* copy direct from output */
                     do {                        /* minimum length is three */
                         *out++ = *from++;
-@@ -284,7 +341,8 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
+@@ -260,7 +317,8 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                         *out++ = *from++;
                         if (len > 1)
                             *out++ = *from++;
@ -145,10 +366,10 @@ index 4bfc995..2084739 100644
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
 diff --git a/inffast.h b/inffast.h
-index b8da8bb..0def2e3 100644
+index e5c1aa4..259882c 100644
 --- a/inffast.h
 +++ b/inffast.h
-@@ -32,4 +32,374 @@
+@@ -8,4 +8,374 @@
    subject to change. Applications should only use zlib.h.
  */
@ -524,10 +745,10 @@ index b8da8bb..0def2e3 100644
 +
 +#endif //defined(INFLATE_CHUNK_SIMD_NEON)
 diff --git a/inflate.c b/inflate.c
-index ca904e7..c78e05b 100644
+index 8acbef4..4e695b1 100644
 --- a/inflate.c
 +++ b/inflate.c
-@@ -429,9 +429,16 @@ unsigned copy;
+@@ -408,9 +408,16 @@ unsigned copy;
     /* if it hasn't been done already, allocate space for the window */
     if (state->window == Z_NULL) {
@ -544,200 +765,6 @@ index ca904e7..c78e05b 100644
         if (state->window == Z_NULL) return 1;
     }
-diff --git a/adler32.c b/adler32.c
+-- 
-index e148022..e024a15 100644
+2.33.0
--- a/adler32.c
+
 +++ b/adler32.c
@@ -83,7 +83,169 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 #  define MOD63(a) a %= BASE
 #endif
 -/* ========================================================================= */
 +#if defined(ADLER32_SIMD_NEON)
 +#include <arm_neon.h>
 +/*
 + * Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
 + */
 +uint32x4_t ZLIB_INTERNAL mul_add_bytes(
 +    uint32x4_t v_s2,
 +    uint16x8_t v_column_sum_1,
 +    uint16x8_t v_column_sum_2,
 +    uint16x8_t v_column_sum_3,
 +    uint16x8_t v_column_sum_4)
 +{
 +    v_s2 = vshlq_n_u32(v_s2, 5);
 +
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_1),
 +        (uint16x4_t) { 32, 31, 30, 29 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1),
 +        (uint16x4_t) { 28, 27, 26, 25 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_2),
 +        (uint16x4_t) { 24, 23, 22, 21 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2),
 +        (uint16x4_t) { 20, 19, 18, 17 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_3),
 +        (uint16x4_t) { 16, 15, 14, 13 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3),
 +        (uint16x4_t) { 12, 11, 10,  9 });
 +    v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_4),
 +        (uint16x4_t) {  8,  7,  6,  5 });
 +    v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4),
 +        (uint16x4_t) {  4,  3,  2,  1 });
 +    return v_s2;
 +}
 +
 +/*
 + * Handle leftover data.
 + */
 +uLong ZLIB_INTERNAL leftover_handler(uint32_t s1, uint32_t s2, const Bytef *buf, z_size_t len)
 +{
 +    if (len) {
 +        if (len >= 16) {
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +            s2 += (s1 += *buf++);
 +
 +            len -= 16;
 +        }
 +
 +        while (len--) {
 +            s2 += (s1 += *buf++);
 +        }
 +
 +        if (s1 >= BASE)
 +            s1 -= BASE;
 +        s2 %= BASE;
 +    }
 +
 +    /*
 +     * Return the recombined sums.
 +     */
 +    return s1 | (s2 << 16);
 +}
 +
 +uLong ZLIB_INTERNAL adler32_simd_(uLong adler, const Bytef *buf, z_size_t len)
 +{
 +    /*
 +     * Split Adler-32 into component sums.
 +     */
 +    uint32_t s1 = adler & 0xffff;
 +    uint32_t s2 = adler >> 16;
 +    /*
 +     * Serially compute s1 & s2, until the data is 16-byte aligned.
 +     */
 +    if ((uintptr_t)buf & 0xf) {
 +        while ((uintptr_t)buf & 0xf) {
 +            s2 += (s1 += *buf++);
 +            --len;
 +        }
 +        if (s1 >= BASE)
 +            s1 -= BASE;
 +        s2 %= BASE;
 +    }
 +    /*
 +     * Process the data in blocks.
 +     */
 +    const unsigned BLOCK_SIZE = 1 << 5;
 +    z_size_t blocks = len / BLOCK_SIZE;
 +    len -= blocks * BLOCK_SIZE;
 +    while (blocks) {
 +        unsigned n = NMAX / BLOCK_SIZE;  /* The NMAX constraint. */
 +        if (n > blocks)
 +            n = (unsigned) blocks;
 +        blocks -= n;
 +        /*
 +         * Process n blocks of data. At most NMAX data bytes can be
 +         * processed before s2 must be reduced modulo BASE.
 +         */
 +        uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, s1 * n };
 +        uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 };
 +
 +        uint16x8_t v_column_sum_1 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
 +        uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
 +        do {
 +            /*
 +             * Load 32 input bytes.
 +             */
 +            const uint8x16_t bytes1 = vld1q_u8((uint8_t*)(buf));
 +            const uint8x16_t bytes2 = vld1q_u8((uint8_t*)(buf + 16));
 +            /*
 +             * Add previous block byte sum to v_s2.
 +             */
 +            v_s2 = vaddq_u32(v_s2, v_s1);
 +            /*
 +             * Horizontally add the bytes for s1.
 +             */
 +            v_s1 = vpadalq_u16(v_s1, vpadalq_u8(vpaddlq_u8(bytes1), bytes2));
 +            /*
 +             * Vertically add the bytes for s2.
 +             */
 +            v_column_sum_1 = vaddw_u8(v_column_sum_1, vget_low_u8 (bytes1));
 +            v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1));
 +            v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8 (bytes2));
 +            v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
 +            buf += BLOCK_SIZE;
 +        } while (--n);
 +        v_s2 = mul_add_bytes(v_s2, v_column_sum_1, v_column_sum_2, v_column_sum_3, v_column_sum_4);
 +        /*
 +         * Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
 +         */
 +        uint32x2_t sum1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1));
 +        uint32x2_t sum2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2));
 +        uint32x2_t s1s2 = vpadd_u32(sum1, sum2);
 +
 +        s1 += vget_lane_u32(s1s2, 0);
 +        s2 += vget_lane_u32(s1s2, 1);
 +        /*
 +         * Reduce.
 +         */
 +        s1 %= BASE;
 +        s2 %= BASE;
 +    }
 +    return leftover_handler(s1, s2, buf, len);
 +
 +}
 +#endif
 +
 uLong ZEXPORT adler32_z(adler, buf, len)
     uLong adler;
     const Bytef *buf;
@@ -92,6 +254,11 @@ uLong ZEXPORT adler32_z(adler, buf, len)
     unsigned long sum2;
     unsigned n;
 +#if defined(ADLER32_SIMD_NEON)
 +    if (buf && len >= 64)
 +        return adler32_simd_(adler, buf, len);
 +#endif
 +
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
     adler &= 0xffff;
 --- zlib-1.2.11/CMakeLists.txt  2020-08-04 14:35:44.023579477 +0800
 +++ CMakeLists.txt      2020-08-04 14:39:38.937798725 +0800
@@ -145,6 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
             contrib/arm/arm_longest_match.h)
         set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
         add_definitions(-DARM_NEON)
 +        add_definitions(-DHASH_ARMV8_CRC32 -march=armv8-a+crc -DUNALIGNED_OK -DADLER32_SIMD_NEON -DINFLATE_CHUNK_SIMD_NEON -O3)
         set(COMPILER ${CMAKE_C_COMPILER})
         # NEON is mandatory in ARMv8.
         if(${COMPILER} MATCHES "aarch64")
--- a/zlib-1.2.11.tar.xz
+++ b/zlib-1.2.11.tar.xz
--- a/zlib-1.2.13.tar.xz
+++ b/zlib-1.2.13.tar.xz
--- a/0004-zlib-Optimize-CRC32.patch
+++ b/0004-zlib-Optimize-CRC32.patch
@ -6,32 +6,33 @@ Subject: [PATCH] zlib: Optimize CRC32
 This patch uses the NEON instruction set to optimize the CRC32
 algorithm.
-On the ARM architecture, we can optimize the efficiency of 
+On the ARM architecture, we can optimize the efficiency of
 crc32 through the interface provided by the neon instruction
 set.
 Modify by Li Qiang.
 ---
- crc32.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
+ crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 47 insertions(+)
+ 1 file changed, 50 insertions(+)
 diff --git a/crc32.c b/crc32.c
-index 9580440..79ebdbd 100644
+index f8357b0..5c53068 100644
 --- a/crc32.c
 +++ b/crc32.c
-@@ -29,6 +29,9 @@
+@@ -28,6 +28,9 @@
 #endif /* MAKECRCH */
- #include "zutil.h"      /* for STDC and FAR definitions */
+ #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
 +#ifdef __aarch64__
 +#include "arm_acle.h"
 +#endif
- /* Definitions for doing the crc four data bytes at a time. */
+  /*
- #if !defined(NOBYFOUR) && defined(Z_U4)
+   A CRC of a message is computed on N braids of words in the message, where
-@@ -194,6 +197,47 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
+@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
     return (const z_crc_t FAR *)crc_table;
 }
- 
+
 +#ifdef __aarch64__
 +ulg crc32_neon(crc, buf, len)
 +    unsigned long crc;
 +    const unsigned char FAR *buf;
@ -72,20 +73,22 @@ index 9580440..79ebdbd 100644
 +
 +    return (crc_result ^ 0xffffffffL);
 +}
 +#endif
 +
- /* ========================================================================= */
+ /* =========================================================================
- #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+  * Use ARM machine instructions if available. This will compute the CRC about
- #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+  * ten times faster than the braided calculation. This code does not check for
-@@ -204,6 +248,9 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
+@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
     const unsigned char FAR *buf;
     z_size_t len;
 {
 +    #ifdef __aarch64__
 +    return crc32_neon(crc, buf, len);
 +    #endif
-     if (buf == Z_NULL) return 0UL;
+
     /* Return initial CRC, if requested. */
     if (buf == Z_NULL) return 0;
 #ifdef DYNAMIC_CRC_TABLE
 -- 
-1.8.3.1
+2.27.0
--- a/zlib.spec
+++ b/zlib.spec
@ -1,26 +1,17 @@
 Name:             zlib
-Version:          1.2.11
+Version:          1.2.13
-Release:          24
+Release:          1
 Summary:          A lossless data-compression library
 License:          zlib and Boost
 URL:              http://www.zlib.net
 Source0:          http://www.zlib.net/zlib-%{version}.tar.xz
 # Patch0 get from fedora
-Patch0:           zlib-1.2.5-minizip-fixuncrypt.patch
+Patch6000:	backport-zlib-1.2.5-minizip-fixuncrypt.patch
-# Patch1 to Patch3 get from http://www.gildor.org/en/projects/zlib
+Patch6001:	backport-fix-undefined-buffer-detected-by-oss-fuzz.patch
 Patch1:           0001-Neon-Optimized-hash-chain-rebase.patch
 Patch2:           0002-Porting-optimized-longest_match.patch
 Patch3:           0003-arm64-specific-build-patch.patch
 Patch4:           0004-zlib-Optimize-CRC32.patch
 Patch5:           zlib-1.2.11-SIMD.patch
 Patch6:           0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch
-Patch6000:        fix-undefined-buffer-detected-by-oss-fuzz.patch
+Patch9000:	zlib-Optimize-CRC32.patch
-Patch6001:        backport-0001-CVE-2018-25032.patch
+Patch9001:	zlib-1.2.11-SIMD.patch
 Patch6002:        backport-0002-CVE-2018-25032.patch
 Patch6003:        backport-0001-CVE-2022-37434.patch
 Patch6004:        backport-0002-CVE-2022-37434.patch
 BuildRequires:    automake, autoconf, libtool
@ -64,25 +55,11 @@ This package contains the development-related content related to minizip.
 %prep
 %setup -n %{name}-%{version}
-%patch0 -p1
+%autosetup -b 0 -n %{name}-%{version} -p1
 %ifarch aarch64
 %patch1 -p1
 %patch2 -p1
 %patch3 -p1
 %patch4 -p1
 %patch5 -p1
 %patch6 -p1
 %endif
 %patch6000 -p1
 %patch6001 -p1
 %patch6002 -p1
 %patch6003 -p1
 %patch6004 -p1
 %build
 export CFLAGS="$RPM_OPT_FLAGS"
 %ifarch aarch64
 CFLAGS+=" -DARM_NEON -O3"
 CFLAGS+=" -march=armv8-a+crc"
 %endif
@ -135,6 +112,10 @@ make test
 %{_libdir}/pkgconfig/minizip.pc
 %changelog
 * Thu Dec 29 2022 zhoupengcheng <zhoupengcheng11@huawei.com> - 1.2.13-1
 - update to zlib-1.2.13
 - remove openEuler uncompiled  patch : 0005-Accelerate-Adler32-using-arm64-SVE-instructions.patch 
 * Mon Dec 26 2022 zhoupengcheng <zhoupengcheng11@huawei.com> - 1.2.11-24
 - DESC:remove unapplied patches