gzip/gzip-l-now-outputs-accurate-size.patch

From cf26200380585019e927fe3cf5c0ecb7c8b3ef14 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 1 Dec 2021 15:38:02 -0800
Subject: [PATCH] gzip: gzip -l now outputs accurate size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gzip -l now decompresses to see how long the uncompressed file was.
This fixes what is by far the most common bug report for gzip.
It has a significant performance cost, but it’s worth it nowadays.
* gzip.c (main): -l now sets 'test' too.  All uses of
'test' changed.
(treat_stdin, treat_file): Call do_list after decompressing,
so that the length is known.
(do_list): Omit arg IFD, since it is no longer needed.
All callers changed.  Get the CRC and uncompressed size
from input_crc and bytes_out instead of using lseek.
* tests/list-big: New test.
* unzip.c (unzip): Set unzip_crc before returning.
* util.c (write_buf): If 'test', output nothing.
Update bytes_out with output byte count, regardless of 'test'.
All callers changed.
---
 gzip.c            | 66 ++++++++++++++++++-----------------------------
 gzip.h            |  1 +
 tests/Makefile.am |  1 +
 tests/list-big    | 31 ++++++++++++++++++++++
 unlzh.c           |  5 ++--
 unlzw.c           | 17 ++++--------
 unzip.c           |  3 +++
 util.c            | 18 ++++++-------
 8 files changed, 76 insertions(+), 66 deletions(-)
 create mode 100755 tests/list-big

diff --git a/gzip.c b/gzip.c
index 735ee0a..ecb19da 100644
--- a/gzip.c
+++ b/gzip.c
@@ -319,7 +319,7 @@ local void discard_input_bytes (size_t nbytes, unsigned int flags);
 local int  make_ofname  (void);
 local void shorten_name  (char *name);
 local int  get_method   (int in);
-local void do_list      (int ifd, int method);
+local void do_list      (int method);
 local int  check_ofname (void);
 local void copy_stat    (struct stat *ifstat);
 local void install_signal_handlers (void);
@@ -535,7 +535,7 @@ int main (int argc, char **argv)
         case 'k':
             keep = 1; break;
         case 'l':
-            list = decompress = to_stdout = 1; break;
+            list = decompress = test = to_stdout = 1; break;
         case 'L':
             license (); finish_out (); break;
         case 'm': /* undocumented, may change later */
@@ -655,7 +655,7 @@ int main (int argc, char **argv)

     /* And get to work */
     if (file_count != 0) {
-        if (to_stdout && !test && !list && (!decompress || !ascii)) {
+        if (to_stdout && !test && (!decompress || !ascii)) {
             SET_BINARY_MODE (STDOUT_FILENO);
         }
         while (optind < argc) {
@@ -673,7 +673,7 @@ int main (int argc, char **argv)
       {
         /* Output any totals, and check for output errors.  */
         if (!quiet && 1 < file_count)
-          do_list (-1, -1);
+          do_list (-1);
         if (fflush (stdout) != 0)
           write_error ();
       }
@@ -759,7 +759,7 @@ local void treat_stdin()
     if (decompress || !ascii) {
       SET_BINARY_MODE (STDIN_FILENO);
     }
-    if (!test && !list && (!decompress || !ascii)) {
+    if (!test && (!decompress || !ascii)) {
       SET_BINARY_MODE (STDOUT_FILENO);
     }
     strcpy(ifname, "stdin");
@@ -786,10 +786,6 @@ local void treat_stdin()
             do_exit(exit_code); /* error message already emitted */
         }
     }
-    if (list) {
-        do_list(ifd, method);
-        return;
-    }

     /* Actually do the compression/decompression. Loop over zipped members.
      */
@@ -805,6 +801,12 @@ local void treat_stdin()
         bytes_out = 0;            /* required for length check */
     }

+    if (list)
+      {
+        do_list (method);
+        return;
+      }
+
     if (verbose) {
         if (test) {
             fprintf(stderr, " OK\n");
@@ -949,7 +951,7 @@ local void treat_file(iname)
     /* Generate output file name. For -r and (-t or -l), skip files
      * without a valid gzip suffix (check done in make_ofname).
      */
-    if (to_stdout && !list && !test) {
+    if (to_stdout && !test) {
         strcpy(ofname, "stdout");

     } else if (make_ofname() != OK) {
@@ -967,12 +969,6 @@ local void treat_file(iname)
             return;               /* error message already emitted */
         }
     }
-    if (list) {
-        do_list(ifd, method);
-        if (close (ifd) != 0)
-          read_error ();
-        return;
-    }

     /* If compressing to a file, check if ofname is not ambiguous
      * because the operating system truncates names. Otherwise, generate
@@ -992,7 +988,7 @@ local void treat_file(iname)
     /* Keep the name even if not truncated except with --no-name: */
     if (!save_orig_name) save_orig_name = !no_name;

-    if (verbose) {
+    if (verbose && !list) {
         fprintf(stderr, "%s:\t", ifname);
     }

@@ -1015,6 +1011,12 @@ local void treat_file(iname)
     if (close (ifd) != 0)
       read_error ();

+    if (list)
+      {
+        do_list (method);
+        return;
+      }
+
     if (!to_stdout)
       {
         copy_stat (&istat);
@@ -1066,7 +1068,7 @@ local void treat_file(iname)
         } else {
             display_ratio(bytes_in-(bytes_out-header_bytes), bytes_in, stderr);
         }
-        if (!test && !to_stdout)
+        if (!test)
           fprintf(stderr, " -- %s %s", keep ? "created" : "replaced with",
                   ofname);
         fprintf(stderr, "\n");
@@ -1395,7 +1397,8 @@ local int make_ofname()
             /* With -t or -l, try all files (even without .gz suffix)
              * except with -r (behave as with just -dr).
              */
-            if (!recursive && (list || test)) return OK;
+            if (!recursive && test)
+              return OK;

             /* Avoid annoying messages with -r */
             if (verbose || (!recursive && !quiet)) {
@@ -1688,7 +1691,6 @@ local int get_method(in)
         last_member = 1;
         if (imagic0 != EOF) {
             write_buf (STDOUT_FILENO, magic, 1);
-            bytes_out++;
         }
     }
     if (method >= 0) return method;
@@ -1724,9 +1726,8 @@ local int get_method(in)
  * If the given method is < 0, display the accumulated totals.
  * IN assertions: time_stamp, header_bytes and ifile_size are initialized.
  */
-local void do_list(ifd, method)
-    int ifd;     /* input file descriptor */
-    int method;  /* compression method */
+static void
+do_list (int method)
 {
     ulg crc;  /* original crc */
     static int first_time = 1;
@@ -1768,26 +1769,9 @@ local void do_list(ifd, method)
         return;
     }
     crc = (ulg)~0; /* unknown */
-    bytes_out = -1L;
-    bytes_in = ifile_size;

     if (method == DEFLATED && !last_member) {
-        /* Get the crc and uncompressed size for gzip'ed (not zip'ed) files.
-         * If the lseek fails, we could use read() to get to the end, but
-         * --list is used to get quick results.
-         * Use "gunzip < foo.gz | wc -c" to get the uncompressed size if
-         * you are not concerned about speed.
-         */
-        bytes_in = lseek(ifd, (off_t)(-8), SEEK_END);
-        if (bytes_in != -1L) {
-            uch buf[8];
-            bytes_in += 8L;
-            if (read(ifd, (char*)buf, sizeof(buf)) != sizeof(buf)) {
-                read_error();
-            }
-            crc       = LG(buf);
-            bytes_out = LG(buf+4);
-        }
+      crc = unzip_crc;
     }

     if (verbose)
diff --git a/gzip.h b/gzip.h
index db0305f..ebe3213 100644
--- a/gzip.h
+++ b/gzip.h
@@ -262,6 +262,7 @@ extern int zip        (int in, int out);
 extern int file_read  (char *buf,  unsigned size);

         /* in unzip.c */
+extern ulg unzip_crc;
 extern int unzip      (int in, int out);
 extern int check_zipfile (int in);

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 256bbf7..18e7c8a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,6 +21,7 @@ TESTS =					\
   hufts					\
   keep					\
   list					\
+  list-big				\
   memcpy-abuse				\
   mixed					\
   null-suffix-clobber			\
diff --git a/tests/list-big b/tests/list-big
new file mode 100755
index 0000000..afa3310
--- /dev/null
+++ b/tests/list-big
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Exercise the --list option with a big file.
+
+# Copyright 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+# limit so don't run it by default.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ..
+
+truncate --size 4G big || framework_failure_
+
+gzip -1 big || fail=1
+gzip -l big.gz >out || fail=1
+case $(cat out) in
+  *' 4294967296 '*' big') ;;
+  *) cat out; fail=1;;
+esac
+
+Exit $fail
diff --git a/unlzh.c b/unlzh.c
index 37084fe..f018922 100644
--- a/unlzh.c
+++ b/unlzh.c
@@ -390,9 +390,8 @@ int unlzh(in, out)
     decode_start();
     while (!done) {
         n = decode((unsigned) DICSIZ, window);
-        if (!test && n > 0) {
-            write_buf(out, (char*)window, n);
-        }
+        if (n > 0)
+          write_buf (out, window, n);
     }
     return OK;
 }
diff --git a/unlzw.c b/unlzw.c
index d7714b5..ba824e4 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -225,10 +225,8 @@ int unlzw(in, out)
                             "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
                             posbits, p[-1],p[0],p[1],p[2],p[3]);
 #endif
-                    if (!test && outpos > 0) {
-                        write_buf(out, (char*)outbuf, outpos);
-                        bytes_out += (off_t)outpos;
-                    }
+                    if (outpos > 0)
+                      write_buf (out, outbuf, outpos);
                     gzip_error (to_stdout
                                 ? "corrupt input."
                                 : "corrupt input. Use zcat to recover some data.");
@@ -257,10 +255,7 @@ int unlzw(in, out)
                             outpos += i;
                         }
                         if (outpos >= OUTBUFSIZ) {
-                            if (!test) {
-                                write_buf(out, (char*)outbuf, outpos);
-                                bytes_out += (off_t)outpos;
-                            }
+                            write_buf (out, outbuf, outpos);
                             outpos = 0;
                         }
                         stackp+= i;
@@ -281,9 +276,7 @@ int unlzw(in, out)
         }
     } while (rsize != 0);

-    if (!test && outpos > 0) {
-        write_buf(out, (char*)outbuf, outpos);
-        bytes_out += (off_t)outpos;
-    }
+    if (outpos > 0)
+      write_buf (out, outbuf, outpos);
     return OK;
 }
diff --git a/unzip.c b/unzip.c
index dacfbaf..b52811e 100644
--- a/unzip.c
+++ b/unzip.c
@@ -51,6 +51,8 @@

 /* Globals */

+ulg unzip_crc;  /* CRC found by 'unzip'.  */
+
 static int decrypt;        /* flag to turn on decryption */
 static int pkzip = 0;      /* set for a pkzip file */
 static int ext_header = 0; /* set if extended local header */
@@ -210,6 +212,7 @@ int unzip(in, out)
         }
     }
     ext_header = pkzip = 0; /* for next file */
+    unzip_crc = orig_crc;
     if (err == OK) return OK;
     exit_code = ERROR;
     if (!test) abort_gzip();
diff --git a/util.c b/util.c
index 4e73036..cd43886 100644
--- a/util.c
+++ b/util.c
@@ -112,7 +112,6 @@ int copy(in, out)
     errno = 0;
     while (insize > inptr) {
         write_buf(out, (char*)inbuf + inptr, insize - inptr);
-        bytes_out += insize - inptr;
         got = read_buffer (in, (char *) inbuf, INBUFSIZ);
         if (got == -1)
             read_error();
@@ -255,9 +254,7 @@ void flush_outbuf()
 {
     if (outcnt == 0) return;

-    if (!test)
-      write_buf (ofd, outbuf, outcnt);
-    bytes_out += (off_t)outcnt;
+    write_buf (ofd, outbuf, outcnt);
     outcnt = 0;
 }

@@ -270,16 +267,13 @@ void flush_window()
     if (outcnt == 0) return;
     updcrc(window, outcnt);

-    if (!test) {
-        write_buf(ofd, (char *)window, outcnt);
-    }
-    bytes_out += (off_t)outcnt;
+    write_buf (ofd, window, outcnt);
     outcnt = 0;
 }

 /* ===========================================================================
- * Does the same as write(), but also handles partial pipe writes and checks
- * for error return.
+ * Update the count of output bytes.  If testing, do not do any
+ * output.  Otherwise, write the buffer, checking for errors.
  */
 void write_buf(fd, buf, cnt)
     int       fd;
@@ -288,6 +282,10 @@ void write_buf(fd, buf, cnt)
 {
     unsigned  n;

+    bytes_out += cnt;
+    if (test)
+      return;
+
     while ((n = write_buffer (fd, buf, cnt)) != cnt) {
         if (n == (unsigned)(-1)) {
             write_error();
--
2.27.0