416 lines
13 KiB
Diff
416 lines
13 KiB
Diff
From cf26200380585019e927fe3cf5c0ecb7c8b3ef14 Mon Sep 17 00:00:00 2001
|
||
From: Paul Eggert <eggert@cs.ucla.edu>
|
||
Date: Wed, 1 Dec 2021 15:38:02 -0800
|
||
Subject: [PATCH] gzip: gzip -l now outputs accurate size
|
||
MIME-Version: 1.0
|
||
Content-Type: text/plain; charset=UTF-8
|
||
Content-Transfer-Encoding: 8bit
|
||
|
||
gzip -l now decompresses to see how long the uncompressed file was.
|
||
This fixes what is by far the most common bug report for gzip.
|
||
It has a significant performance cost, but it’s worth it nowadays.
|
||
* gzip.c (main): -l now sets 'test' too. All uses of
|
||
'test' changed.
|
||
(treat_stdin, treat_file): Call do_list after decompressing,
|
||
so that the length is known.
|
||
(do_list): Omit arg IFD, since it is no longer needed.
|
||
All callers changed. Get the CRC and uncompressed size
|
||
from input_crc and bytes_out instead of using lseek.
|
||
* tests/list-big: New test.
|
||
* unzip.c (unzip): Set unzip_crc before returning.
|
||
* util.c (write_buf): If 'test', output nothing.
|
||
Update bytes_out with output byte count, regardless of 'test'.
|
||
All callers changed.
|
||
---
|
||
gzip.c | 66 ++++++++++++++++++-----------------------------
|
||
gzip.h | 1 +
|
||
tests/Makefile.am | 1 +
|
||
tests/list-big | 31 ++++++++++++++++++++++
|
||
unlzh.c | 5 ++--
|
||
unlzw.c | 17 ++++--------
|
||
unzip.c | 3 +++
|
||
util.c | 18 ++++++-------
|
||
8 files changed, 76 insertions(+), 66 deletions(-)
|
||
create mode 100755 tests/list-big
|
||
|
||
diff --git a/gzip.c b/gzip.c
|
||
index 735ee0a..ecb19da 100644
|
||
--- a/gzip.c
|
||
+++ b/gzip.c
|
||
@@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags);
|
||
local int make_ofname (void);
|
||
local void shorten_name (char *name);
|
||
local int get_method (int in);
|
||
-local void do_list (int ifd, int method);
|
||
+local void do_list (int method);
|
||
local int check_ofname (void);
|
||
local void copy_stat (struct stat *ifstat);
|
||
local void install_signal_handlers (void);
|
||
@@ -535,7 +535,7 @@ int main (int argc, char **argv)
|
||
case 'k':
|
||
keep = 1; break;
|
||
case 'l':
|
||
- list = decompress = to_stdout = 1; break;
|
||
+ list = decompress = test = to_stdout = 1; break;
|
||
case 'L':
|
||
license (); finish_out (); break;
|
||
case 'm': /* undocumented, may change later */
|
||
@@ -655,7 +655,7 @@ int main (int argc, char **argv)
|
||
|
||
/* And get to work */
|
||
if (file_count != 0) {
|
||
- if (to_stdout && !test && !list && (!decompress || !ascii)) {
|
||
+ if (to_stdout && !test && (!decompress || !ascii)) {
|
||
SET_BINARY_MODE (STDOUT_FILENO);
|
||
}
|
||
while (optind < argc) {
|
||
@@ -673,7 +673,7 @@ int main (int argc, char **argv)
|
||
{
|
||
/* Output any totals, and check for output errors. */
|
||
if (!quiet && 1 < file_count)
|
||
- do_list (-1, -1);
|
||
+ do_list (-1);
|
||
if (fflush (stdout) != 0)
|
||
write_error ();
|
||
}
|
||
@@ -759,7 +759,7 @@ local void treat_stdin()
|
||
if (decompress || !ascii) {
|
||
SET_BINARY_MODE (STDIN_FILENO);
|
||
}
|
||
- if (!test && !list && (!decompress || !ascii)) {
|
||
+ if (!test && (!decompress || !ascii)) {
|
||
SET_BINARY_MODE (STDOUT_FILENO);
|
||
}
|
||
strcpy(ifname, "stdin");
|
||
@@ -786,10 +786,6 @@ local void treat_stdin()
|
||
do_exit(exit_code); /* error message already emitted */
|
||
}
|
||
}
|
||
- if (list) {
|
||
- do_list(ifd, method);
|
||
- return;
|
||
- }
|
||
|
||
/* Actually do the compression/decompression. Loop over zipped members.
|
||
*/
|
||
@@ -805,6 +801,12 @@ local void treat_stdin()
|
||
bytes_out = 0; /* required for length check */
|
||
}
|
||
|
||
+ if (list)
|
||
+ {
|
||
+ do_list (method);
|
||
+ return;
|
||
+ }
|
||
+
|
||
if (verbose) {
|
||
if (test) {
|
||
fprintf(stderr, " OK\n");
|
||
@@ -949,7 +951,7 @@ local void treat_file(iname)
|
||
/* Generate output file name. For -r and (-t or -l), skip files
|
||
* without a valid gzip suffix (check done in make_ofname).
|
||
*/
|
||
- if (to_stdout && !list && !test) {
|
||
+ if (to_stdout && !test) {
|
||
strcpy(ofname, "stdout");
|
||
|
||
} else if (make_ofname() != OK) {
|
||
@@ -967,12 +969,6 @@ local void treat_file(iname)
|
||
return; /* error message already emitted */
|
||
}
|
||
}
|
||
- if (list) {
|
||
- do_list(ifd, method);
|
||
- if (close (ifd) != 0)
|
||
- read_error ();
|
||
- return;
|
||
- }
|
||
|
||
/* If compressing to a file, check if ofname is not ambiguous
|
||
* because the operating system truncates names. Otherwise, generate
|
||
@@ -992,7 +988,7 @@ local void treat_file(iname)
|
||
/* Keep the name even if not truncated except with --no-name: */
|
||
if (!save_orig_name) save_orig_name = !no_name;
|
||
|
||
- if (verbose) {
|
||
+ if (verbose && !list) {
|
||
fprintf(stderr, "%s:\t", ifname);
|
||
}
|
||
|
||
@@ -1015,6 +1011,12 @@ local void treat_file(iname)
|
||
if (close (ifd) != 0)
|
||
read_error ();
|
||
|
||
+ if (list)
|
||
+ {
|
||
+ do_list (method);
|
||
+ return;
|
||
+ }
|
||
+
|
||
if (!to_stdout)
|
||
{
|
||
copy_stat (&istat);
|
||
@@ -1066,7 +1068,7 @@ local void treat_file(iname)
|
||
} else {
|
||
display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr);
|
||
}
|
||
- if (!test && !to_stdout)
|
||
+ if (!test)
|
||
fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with",
|
||
ofname);
|
||
fprintf(stderr, "\n");
|
||
@@ -1395,7 +1397,8 @@ local int make_ofname()
|
||
/* With -t or -l, try all files (even without .gz suffix)
|
||
* except with -r (behave as with just -dr).
|
||
*/
|
||
- if (!recursive && (list || test)) return OK;
|
||
+ if (!recursive && test)
|
||
+ return OK;
|
||
|
||
/* Avoid annoying messages with -r */
|
||
if (verbose || (!recursive && !quiet)) {
|
||
@@ -1688,7 +1691,6 @@ local int get_method(in)
|
||
last_member = 1;
|
||
if (imagic0 != EOF) {
|
||
write_buf (STDOUT_FILENO, magic, 1);
|
||
- bytes_out++;
|
||
}
|
||
}
|
||
if (method >= 0) return method;
|
||
@@ -1724,9 +1726,8 @@ local int get_method(in)
|
||
* If the given method is < 0, display the accumulated totals.
|
||
* IN assertions: time_stamp, header_bytes and ifile_size are initialized.
|
||
*/
|
||
-local void do_list(ifd, method)
|
||
- int ifd; /* input file descriptor */
|
||
- int method; /* compression method */
|
||
+static void
|
||
+do_list (int method)
|
||
{
|
||
ulg crc; /* original crc */
|
||
static int first_time = 1;
|
||
@@ -1768,26 +1769,9 @@ local void do_list(ifd, method)
|
||
return;
|
||
}
|
||
crc = (ulg)~0; /* unknown */
|
||
- bytes_out = -1L;
|
||
- bytes_in = ifile_size;
|
||
|
||
if (method == DEFLATED && !last_member) {
|
||
- /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files.
|
||
- * If the lseek fails, we could use read() to get to the end, but
|
||
- * --list is used to get quick results.
|
||
- * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if
|
||
- * you are not concerned about speed.
|
||
- */
|
||
- bytes_in = lseek(ifd, (off_t)(-8), SEEK_END);
|
||
- if (bytes_in != -1L) {
|
||
- uch buf[8];
|
||
- bytes_in += 8L;
|
||
- if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) {
|
||
- read_error();
|
||
- }
|
||
- crc = LG(buf);
|
||
- bytes_out = LG(buf+4);
|
||
- }
|
||
+ crc = unzip_crc;
|
||
}
|
||
|
||
if (verbose)
|
||
diff --git a/gzip.h b/gzip.h
|
||
index db0305f..ebe3213 100644
|
||
--- a/gzip.h
|
||
+++ b/gzip.h
|
||
@@ -262,6 +262,7 @@ extern int zip (int in, int out);
|
||
extern int file_read (char *buf, unsigned size);
|
||
|
||
/* in unzip.c */
|
||
+extern ulg unzip_crc;
|
||
extern int unzip (int in, int out);
|
||
extern int check_zipfile (int in);
|
||
|
||
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||
index 256bbf7..18e7c8a 100644
|
||
--- a/tests/Makefile.am
|
||
+++ b/tests/Makefile.am
|
||
@@ -21,6 +21,7 @@ TESTS = \
|
||
hufts \
|
||
keep \
|
||
list \
|
||
+ list-big \
|
||
memcpy-abuse \
|
||
mixed \
|
||
null-suffix-clobber \
|
||
diff --git a/tests/list-big b/tests/list-big
|
||
new file mode 100755
|
||
index 0000000..afa3310
|
||
--- /dev/null
|
||
+++ b/tests/list-big
|
||
@@ -0,0 +1,31 @@
|
||
+#!/bin/sh
|
||
+# Exercise the --list option with a big file.
|
||
+
|
||
+# Copyright 2021 Free Software Foundation, Inc.
|
||
+
|
||
+# This program is free software: you can redistribute it and/or modify
|
||
+# it under the terms of the GNU General Public License as published by
|
||
+# the Free Software Foundation, either version 3 of the License, or
|
||
+# (at your option) any later version.
|
||
+
|
||
+# This program is distributed in the hope that it will be useful,
|
||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+# GNU General Public License for more details.
|
||
+
|
||
+# You should have received a copy of the GNU General Public License
|
||
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
+# limit so don't run it by default.
|
||
+
|
||
+. "${srcdir=.}/init.sh"; path_prepend_ ..
|
||
+
|
||
+truncate --size 4G big || framework_failure_
|
||
+
|
||
+gzip -1 big || fail=1
|
||
+gzip -l big.gz >out || fail=1
|
||
+case $(cat out) in
|
||
+ *' 4294967296 '*' big') ;;
|
||
+ *) cat out; fail=1;;
|
||
+esac
|
||
+
|
||
+Exit $fail
|
||
diff --git a/unlzh.c b/unlzh.c
|
||
index 37084fe..f018922 100644
|
||
--- a/unlzh.c
|
||
+++ b/unlzh.c
|
||
@@ -390,9 +390,8 @@ int unlzh(in, out)
|
||
decode_start();
|
||
while (!done) {
|
||
n = decode((unsigned) DICSIZ, window);
|
||
- if (!test && n > 0) {
|
||
- write_buf(out, (char*)window, n);
|
||
- }
|
||
+ if (n > 0)
|
||
+ write_buf (out, window, n);
|
||
}
|
||
return OK;
|
||
}
|
||
diff --git a/unlzw.c b/unlzw.c
|
||
index d7714b5..ba824e4 100644
|
||
--- a/unlzw.c
|
||
+++ b/unlzw.c
|
||
@@ -225,10 +225,8 @@ int unlzw(in, out)
|
||
"posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
|
||
posbits, p[-1],p[0],p[1],p[2],p[3]);
|
||
#endif
|
||
- if (!test && outpos > 0) {
|
||
- write_buf(out, (char*)outbuf, outpos);
|
||
- bytes_out += (off_t)outpos;
|
||
- }
|
||
+ if (outpos > 0)
|
||
+ write_buf (out, outbuf, outpos);
|
||
gzip_error (to_stdout
|
||
? "corrupt input."
|
||
: "corrupt input. Use zcat to recover some data.");
|
||
@@ -257,10 +255,7 @@ int unlzw(in, out)
|
||
outpos += i;
|
||
}
|
||
if (outpos >= OUTBUFSIZ) {
|
||
- if (!test) {
|
||
- write_buf(out, (char*)outbuf, outpos);
|
||
- bytes_out += (off_t)outpos;
|
||
- }
|
||
+ write_buf (out, outbuf, outpos);
|
||
outpos = 0;
|
||
}
|
||
stackp+= i;
|
||
@@ -281,9 +276,7 @@ int unlzw(in, out)
|
||
}
|
||
} while (rsize != 0);
|
||
|
||
- if (!test && outpos > 0) {
|
||
- write_buf(out, (char*)outbuf, outpos);
|
||
- bytes_out += (off_t)outpos;
|
||
- }
|
||
+ if (outpos > 0)
|
||
+ write_buf (out, outbuf, outpos);
|
||
return OK;
|
||
}
|
||
diff --git a/unzip.c b/unzip.c
|
||
index dacfbaf..b52811e 100644
|
||
--- a/unzip.c
|
||
+++ b/unzip.c
|
||
@@ -51,6 +51,8 @@
|
||
|
||
/* Globals */
|
||
|
||
+ulg unzip_crc; /* CRC found by 'unzip'. */
|
||
+
|
||
static int decrypt; /* flag to turn on decryption */
|
||
static int pkzip = 0; /* set for a pkzip file */
|
||
static int ext_header = 0; /* set if extended local header */
|
||
@@ -210,6 +212,7 @@ int unzip(in, out)
|
||
}
|
||
}
|
||
ext_header = pkzip = 0; /* for next file */
|
||
+ unzip_crc = orig_crc;
|
||
if (err == OK) return OK;
|
||
exit_code = ERROR;
|
||
if (!test) abort_gzip();
|
||
diff --git a/util.c b/util.c
|
||
index 4e73036..cd43886 100644
|
||
--- a/util.c
|
||
+++ b/util.c
|
||
@@ -112,7 +112,6 @@ int copy(in, out)
|
||
errno = 0;
|
||
while (insize > inptr) {
|
||
write_buf(out, (char*)inbuf + inptr, insize - inptr);
|
||
- bytes_out += insize - inptr;
|
||
got = read_buffer (in, (char *) inbuf, INBUFSIZ);
|
||
if (got == -1)
|
||
read_error();
|
||
@@ -255,9 +254,7 @@ void flush_outbuf()
|
||
{
|
||
if (outcnt == 0) return;
|
||
|
||
- if (!test)
|
||
- write_buf (ofd, outbuf, outcnt);
|
||
- bytes_out += (off_t)outcnt;
|
||
+ write_buf (ofd, outbuf, outcnt);
|
||
outcnt = 0;
|
||
}
|
||
|
||
@@ -270,16 +267,13 @@ void flush_window()
|
||
if (outcnt == 0) return;
|
||
updcrc(window, outcnt);
|
||
|
||
- if (!test) {
|
||
- write_buf(ofd, (char *)window, outcnt);
|
||
- }
|
||
- bytes_out += (off_t)outcnt;
|
||
+ write_buf (ofd, window, outcnt);
|
||
outcnt = 0;
|
||
}
|
||
|
||
/* ===========================================================================
|
||
- * Does the same as write(), but also handles partial pipe writes and checks
|
||
- * for error return.
|
||
+ * Update the count of output bytes. If testing, do not do any
|
||
+ * output. Otherwise, write the buffer, checking for errors.
|
||
*/
|
||
void write_buf(fd, buf, cnt)
|
||
int fd;
|
||
@@ -288,6 +282,10 @@ void write_buf(fd, buf, cnt)
|
||
{
|
||
unsigned n;
|
||
|
||
+ bytes_out += cnt;
|
||
+ if (test)
|
||
+ return;
|
||
+
|
||
while ((n = write_buffer (fd, buf, cnt)) != cnt) {
|
||
if (n == (unsigned)(-1)) {
|
||
write_error();
|
||
--
|
||
2.27.0
|
||
|