gzip/gzip-l-now-outputs-accurate-size.patch
weiwei_tiantian 65feba82ae backport fix patches from upstream and fix build failure in OBS
(cherry picked from commit 43725f660564fa21667a106f5aae9e35738a0e34)
2022-04-02 10:57:27 +08:00

416 lines
13 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From cf26200380585019e927fe3cf5c0ecb7c8b3ef14 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 1 Dec 2021 15:38:02 -0800
Subject: [PATCH] gzip: gzip -l now outputs accurate size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
gzip -l now decompresses to see how long the uncompressed file was.
This fixes what is by far the most common bug report for gzip.
It has a significant performance cost, but its worth it nowadays.
* gzip.c (main): -l now sets 'test' too. All uses of
'test' changed.
(treat_stdin, treat_file): Call do_list after decompressing,
so that the length is known.
(do_list): Omit arg IFD, since it is no longer needed.
All callers changed. Get the CRC and uncompressed size
from input_crc and bytes_out instead of using lseek.
* tests/list-big: New test.
* unzip.c (unzip): Set unzip_crc before returning.
* util.c (write_buf): If 'test', output nothing.
Update bytes_out with output byte count, regardless of 'test'.
All callers changed.
---
gzip.c | 66 ++++++++++++++++++-----------------------------
gzip.h | 1 +
tests/Makefile.am | 1 +
tests/list-big | 31 ++++++++++++++++++++++
unlzh.c | 5 ++--
unlzw.c | 17 ++++--------
unzip.c | 3 +++
util.c | 18 ++++++-------
8 files changed, 76 insertions(+), 66 deletions(-)
create mode 100755 tests/list-big
diff --git a/gzip.c b/gzip.c
index 735ee0a..ecb19da 100644
--- a/gzip.c
+++ b/gzip.c
@@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags);
local int make_ofname (void);
local void shorten_name (char *name);
local int get_method (int in);
-local void do_list (int ifd, int method);
+local void do_list (int method);
local int check_ofname (void);
local void copy_stat (struct stat *ifstat);
local void install_signal_handlers (void);
@@ -535,7 +535,7 @@ int main (int argc, char **argv)
case 'k':
keep = 1; break;
case 'l':
- list = decompress = to_stdout = 1; break;
+ list = decompress = test = to_stdout = 1; break;
case 'L':
license (); finish_out (); break;
case 'm': /* undocumented, may change later */
@@ -655,7 +655,7 @@ int main (int argc, char **argv)
/* And get to work */
if (file_count != 0) {
- if (to_stdout && !test && !list && (!decompress || !ascii)) {
+ if (to_stdout && !test && (!decompress || !ascii)) {
SET_BINARY_MODE (STDOUT_FILENO);
}
while (optind < argc) {
@@ -673,7 +673,7 @@ int main (int argc, char **argv)
{
/* Output any totals, and check for output errors. */
if (!quiet && 1 < file_count)
- do_list (-1, -1);
+ do_list (-1);
if (fflush (stdout) != 0)
write_error ();
}
@@ -759,7 +759,7 @@ local void treat_stdin()
if (decompress || !ascii) {
SET_BINARY_MODE (STDIN_FILENO);
}
- if (!test && !list && (!decompress || !ascii)) {
+ if (!test && (!decompress || !ascii)) {
SET_BINARY_MODE (STDOUT_FILENO);
}
strcpy(ifname, "stdin");
@@ -786,10 +786,6 @@ local void treat_stdin()
do_exit(exit_code); /* error message already emitted */
}
}
- if (list) {
- do_list(ifd, method);
- return;
- }
/* Actually do the compression/decompression. Loop over zipped members.
*/
@@ -805,6 +801,12 @@ local void treat_stdin()
bytes_out = 0; /* required for length check */
}
+ if (list)
+ {
+ do_list (method);
+ return;
+ }
+
if (verbose) {
if (test) {
fprintf(stderr, " OK\n");
@@ -949,7 +951,7 @@ local void treat_file(iname)
/* Generate output file name. For -r and (-t or -l), skip files
* without a valid gzip suffix (check done in make_ofname).
*/
- if (to_stdout && !list && !test) {
+ if (to_stdout && !test) {
strcpy(ofname, "stdout");
} else if (make_ofname() != OK) {
@@ -967,12 +969,6 @@ local void treat_file(iname)
return; /* error message already emitted */
}
}
- if (list) {
- do_list(ifd, method);
- if (close (ifd) != 0)
- read_error ();
- return;
- }
/* If compressing to a file, check if ofname is not ambiguous
* because the operating system truncates names. Otherwise, generate
@@ -992,7 +988,7 @@ local void treat_file(iname)
/* Keep the name even if not truncated except with --no-name: */
if (!save_orig_name) save_orig_name = !no_name;
- if (verbose) {
+ if (verbose && !list) {
fprintf(stderr, "%s:\t", ifname);
}
@@ -1015,6 +1011,12 @@ local void treat_file(iname)
if (close (ifd) != 0)
read_error ();
+ if (list)
+ {
+ do_list (method);
+ return;
+ }
+
if (!to_stdout)
{
copy_stat (&istat);
@@ -1066,7 +1068,7 @@ local void treat_file(iname)
} else {
display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr);
}
- if (!test && !to_stdout)
+ if (!test)
fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with",
ofname);
fprintf(stderr, "\n");
@@ -1395,7 +1397,8 @@ local int make_ofname()
/* With -t or -l, try all files (even without .gz suffix)
* except with -r (behave as with just -dr).
*/
- if (!recursive && (list || test)) return OK;
+ if (!recursive && test)
+ return OK;
/* Avoid annoying messages with -r */
if (verbose || (!recursive && !quiet)) {
@@ -1688,7 +1691,6 @@ local int get_method(in)
last_member = 1;
if (imagic0 != EOF) {
write_buf (STDOUT_FILENO, magic, 1);
- bytes_out++;
}
}
if (method >= 0) return method;
@@ -1724,9 +1726,8 @@ local int get_method(in)
* If the given method is < 0, display the accumulated totals.
* IN assertions: time_stamp, header_bytes and ifile_size are initialized.
*/
-local void do_list(ifd, method)
- int ifd; /* input file descriptor */
- int method; /* compression method */
+static void
+do_list (int method)
{
ulg crc; /* original crc */
static int first_time = 1;
@@ -1768,26 +1769,9 @@ local void do_list(ifd, method)
return;
}
crc = (ulg)~0; /* unknown */
- bytes_out = -1L;
- bytes_in = ifile_size;
if (method == DEFLATED && !last_member) {
- /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files.
- * If the lseek fails, we could use read() to get to the end, but
- * --list is used to get quick results.
- * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if
- * you are not concerned about speed.
- */
- bytes_in = lseek(ifd, (off_t)(-8), SEEK_END);
- if (bytes_in != -1L) {
- uch buf[8];
- bytes_in += 8L;
- if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) {
- read_error();
- }
- crc = LG(buf);
- bytes_out = LG(buf+4);
- }
+ crc = unzip_crc;
}
if (verbose)
diff --git a/gzip.h b/gzip.h
index db0305f..ebe3213 100644
--- a/gzip.h
+++ b/gzip.h
@@ -262,6 +262,7 @@ extern int zip (int in, int out);
extern int file_read (char *buf, unsigned size);
/* in unzip.c */
+extern ulg unzip_crc;
extern int unzip (int in, int out);
extern int check_zipfile (int in);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 256bbf7..18e7c8a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,6 +21,7 @@ TESTS = \
hufts \
keep \
list \
+ list-big \
memcpy-abuse \
mixed \
null-suffix-clobber \
diff --git a/tests/list-big b/tests/list-big
new file mode 100755
index 0000000..afa3310
--- /dev/null
+++ b/tests/list-big
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Exercise the --list option with a big file.
+
+# Copyright 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# limit so don't run it by default.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ..
+
+truncate --size 4G big || framework_failure_
+
+gzip -1 big || fail=1
+gzip -l big.gz >out || fail=1
+case $(cat out) in
+ *' 4294967296 '*' big') ;;
+ *) cat out; fail=1;;
+esac
+
+Exit $fail
diff --git a/unlzh.c b/unlzh.c
index 37084fe..f018922 100644
--- a/unlzh.c
+++ b/unlzh.c
@@ -390,9 +390,8 @@ int unlzh(in, out)
decode_start();
while (!done) {
n = decode((unsigned) DICSIZ, window);
- if (!test && n > 0) {
- write_buf(out, (char*)window, n);
- }
+ if (n > 0)
+ write_buf (out, window, n);
}
return OK;
}
diff --git a/unlzw.c b/unlzw.c
index d7714b5..ba824e4 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -225,10 +225,8 @@ int unlzw(in, out)
"posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
posbits, p[-1],p[0],p[1],p[2],p[3]);
#endif
- if (!test && outpos > 0) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ if (outpos > 0)
+ write_buf (out, outbuf, outpos);
gzip_error (to_stdout
? "corrupt input."
: "corrupt input. Use zcat to recover some data.");
@@ -257,10 +255,7 @@ int unlzw(in, out)
outpos += i;
}
if (outpos >= OUTBUFSIZ) {
- if (!test) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ write_buf (out, outbuf, outpos);
outpos = 0;
}
stackp+= i;
@@ -281,9 +276,7 @@ int unlzw(in, out)
}
} while (rsize != 0);
- if (!test && outpos > 0) {
- write_buf(out, (char*)outbuf, outpos);
- bytes_out += (off_t)outpos;
- }
+ if (outpos > 0)
+ write_buf (out, outbuf, outpos);
return OK;
}
diff --git a/unzip.c b/unzip.c
index dacfbaf..b52811e 100644
--- a/unzip.c
+++ b/unzip.c
@@ -51,6 +51,8 @@
/* Globals */
+ulg unzip_crc; /* CRC found by 'unzip'. */
+
static int decrypt; /* flag to turn on decryption */
static int pkzip = 0; /* set for a pkzip file */
static int ext_header = 0; /* set if extended local header */
@@ -210,6 +212,7 @@ int unzip(in, out)
}
}
ext_header = pkzip = 0; /* for next file */
+ unzip_crc = orig_crc;
if (err == OK) return OK;
exit_code = ERROR;
if (!test) abort_gzip();
diff --git a/util.c b/util.c
index 4e73036..cd43886 100644
--- a/util.c
+++ b/util.c
@@ -112,7 +112,6 @@ int copy(in, out)
errno = 0;
while (insize > inptr) {
write_buf(out, (char*)inbuf + inptr, insize - inptr);
- bytes_out += insize - inptr;
got = read_buffer (in, (char *) inbuf, INBUFSIZ);
if (got == -1)
read_error();
@@ -255,9 +254,7 @@ void flush_outbuf()
{
if (outcnt == 0) return;
- if (!test)
- write_buf (ofd, outbuf, outcnt);
- bytes_out += (off_t)outcnt;
+ write_buf (ofd, outbuf, outcnt);
outcnt = 0;
}
@@ -270,16 +267,13 @@ void flush_window()
if (outcnt == 0) return;
updcrc(window, outcnt);
- if (!test) {
- write_buf(ofd, (char *)window, outcnt);
- }
- bytes_out += (off_t)outcnt;
+ write_buf (ofd, window, outcnt);
outcnt = 0;
}
/* ===========================================================================
- * Does the same as write(), but also handles partial pipe writes and checks
- * for error return.
+ * Update the count of output bytes. If testing, do not do any
+ * output. Otherwise, write the buffer, checking for errors.
*/
void write_buf(fd, buf, cnt)
int fd;
@@ -288,6 +282,10 @@ void write_buf(fd, buf, cnt)
{
unsigned n;
+ bytes_out += cnt;
+ if (test)
+ return;
+
while ((n = write_buffer (fd, buf, cnt)) != cnt) {
if (n == (unsigned)(-1)) {
write_error();
--
2.27.0