wget/Save-original-data-to-WARC-file.patch

143 lines
4.2 KiB
Diff
Raw Normal View History

2019-09-30 11:19:50 -04:00
From ad261f41ceeb59242a096b31854038c3eff65c8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Tue, 29 May 2018 10:49:24 +0200
Subject: [PATCH 11/83] Save original data to WARC file
* src/retr.c (write_data): Cleanup,
(fd_read_body): Write to WARC before uncompressing
Fixes: #53968
---
src/retr.c | 68 +++++++++++++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 26 deletions(-)
diff --git a/src/retr.c b/src/retr.c
index 17ed228b..ae86730c 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -159,8 +159,8 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
amount of data and decrease SKIP. Increment *TOTAL by the amount
of data written. If OUT2 is not NULL, also write BUF to OUT2.
- In case of error writing to OUT, -1 is returned. In case of error
- writing to OUT2, -2 is returned. Return 1 if the whole BUF was
+ In case of error writing to OUT, -2 is returned. In case of error
+ writing to OUT2, -3 is returned. Return 1 if the whole BUF was
skipped. */
static int
@@ -169,25 +169,31 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
{
if (out == NULL && out2 == NULL)
return 1;
- if (*skip > bufsize)
- {
- *skip -= bufsize;
- return 1;
- }
- if (*skip)
+
+ if (skip)
{
- buf += *skip;
- bufsize -= *skip;
- *skip = 0;
- if (bufsize == 0)
- return 1;
+ if (*skip > bufsize)
+ {
+ *skip -= bufsize;
+ return 1;
+ }
+ if (*skip)
+ {
+ buf += *skip;
+ bufsize -= *skip;
+ *skip = 0;
+ if (bufsize == 0)
+ return 1;
+ }
}
- if (out != NULL)
+ if (out)
fwrite (buf, 1, bufsize, out);
- if (out2 != NULL)
+ if (out2)
fwrite (buf, 1, bufsize, out2);
- *written += bufsize;
+
+ if (written)
+ *written += bufsize;
/* Immediately flush the downloaded data. This should not hinder
performance: fast downloads will arrive in large 16K chunks
@@ -203,17 +209,18 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
actual justification. (Also, why 16K? Anyone test other values?)
*/
#ifndef __VMS
- if (out != NULL)
+ if (out)
fflush (out);
- if (out2 != NULL)
+ if (out2)
fflush (out2);
#endif /* ndef __VMS */
- if (out != NULL && ferror (out))
- return -1;
- else if (out2 != NULL && ferror (out2))
+
+ if (out && ferror (out))
return -2;
- else
- return 0;
+ else if (out2 && ferror (out2))
+ return -3;
+
+ return 0;
}
/* Read the contents of file descriptor FD until it the connection
@@ -452,6 +459,15 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
{
int err;
int towrite;
+
+ /* Write original data to WARC file */
+ write_res = write_data (NULL, out2, dlbuf, ret, NULL, NULL);
+ if (write_res < 0)
+ {
+ ret = write_res;
+ goto out;
+ }
+
gzstream.avail_in = ret;
gzstream.next_in = (unsigned char *) dlbuf;
@@ -482,11 +498,11 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
}
towrite = gzbufsize - gzstream.avail_out;
- write_res = write_data (out, out2, gzbuf, towrite, &skip,
+ write_res = write_data (out, NULL, gzbuf, towrite, &skip,
&sum_written);
if (write_res < 0)
{
- ret = (write_res == -3) ? -3 : -2;
+ ret = write_res;
goto out;
}
}
@@ -499,7 +515,7 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
&sum_written);
if (write_res < 0)
{
- ret = (write_res == -3) ? -3 : -2;
+ ret = write_res;
goto out;
}
}
--
2.19.1