143 lines
4.2 KiB
Diff
143 lines
4.2 KiB
Diff
From ad261f41ceeb59242a096b31854038c3eff65c8f Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
|
|
Date: Tue, 29 May 2018 10:49:24 +0200
|
|
Subject: [PATCH 11/83] Save original data to WARC file
|
|
|
|
* src/retr.c (write_data): Cleanup,
|
|
(fd_read_body): Write to WARC before uncompressing
|
|
|
|
Fixes: #53968
|
|
---
|
|
src/retr.c | 68 +++++++++++++++++++++++++++++++++---------------------
|
|
1 file changed, 42 insertions(+), 26 deletions(-)
|
|
|
|
diff --git a/src/retr.c b/src/retr.c
|
|
index 17ed228b..ae86730c 100644
|
|
--- a/src/retr.c
|
|
+++ b/src/retr.c
|
|
@@ -159,8 +159,8 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
|
|
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
|
|
amount of data and decrease SKIP. Increment *TOTAL by the amount
|
|
of data written. If OUT2 is not NULL, also write BUF to OUT2.
|
|
- In case of error writing to OUT, -1 is returned. In case of error
|
|
- writing to OUT2, -2 is returned. Return 1 if the whole BUF was
|
|
+ In case of error writing to OUT, -2 is returned. In case of error
|
|
+ writing to OUT2, -3 is returned. Return 1 if the whole BUF was
|
|
skipped. */
|
|
|
|
static int
|
|
@@ -169,25 +169,31 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
|
|
{
|
|
if (out == NULL && out2 == NULL)
|
|
return 1;
|
|
- if (*skip > bufsize)
|
|
- {
|
|
- *skip -= bufsize;
|
|
- return 1;
|
|
- }
|
|
- if (*skip)
|
|
+
|
|
+ if (skip)
|
|
{
|
|
- buf += *skip;
|
|
- bufsize -= *skip;
|
|
- *skip = 0;
|
|
- if (bufsize == 0)
|
|
- return 1;
|
|
+ if (*skip > bufsize)
|
|
+ {
|
|
+ *skip -= bufsize;
|
|
+ return 1;
|
|
+ }
|
|
+ if (*skip)
|
|
+ {
|
|
+ buf += *skip;
|
|
+ bufsize -= *skip;
|
|
+ *skip = 0;
|
|
+ if (bufsize == 0)
|
|
+ return 1;
|
|
+ }
|
|
}
|
|
|
|
- if (out != NULL)
|
|
+ if (out)
|
|
fwrite (buf, 1, bufsize, out);
|
|
- if (out2 != NULL)
|
|
+ if (out2)
|
|
fwrite (buf, 1, bufsize, out2);
|
|
- *written += bufsize;
|
|
+
|
|
+ if (written)
|
|
+ *written += bufsize;
|
|
|
|
/* Immediately flush the downloaded data. This should not hinder
|
|
performance: fast downloads will arrive in large 16K chunks
|
|
@@ -203,17 +209,18 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
|
|
actual justification. (Also, why 16K? Anyone test other values?)
|
|
*/
|
|
#ifndef __VMS
|
|
- if (out != NULL)
|
|
+ if (out)
|
|
fflush (out);
|
|
- if (out2 != NULL)
|
|
+ if (out2)
|
|
fflush (out2);
|
|
#endif /* ndef __VMS */
|
|
- if (out != NULL && ferror (out))
|
|
- return -1;
|
|
- else if (out2 != NULL && ferror (out2))
|
|
+
|
|
+ if (out && ferror (out))
|
|
return -2;
|
|
- else
|
|
- return 0;
|
|
+ else if (out2 && ferror (out2))
|
|
+ return -3;
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
/* Read the contents of file descriptor FD until it the connection
|
|
@@ -452,6 +459,15 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|
{
|
|
int err;
|
|
int towrite;
|
|
+
|
|
+ /* Write original data to WARC file */
|
|
+ write_res = write_data (NULL, out2, dlbuf, ret, NULL, NULL);
|
|
+ if (write_res < 0)
|
|
+ {
|
|
+ ret = write_res;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
gzstream.avail_in = ret;
|
|
gzstream.next_in = (unsigned char *) dlbuf;
|
|
|
|
@@ -482,11 +498,11 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|
}
|
|
|
|
towrite = gzbufsize - gzstream.avail_out;
|
|
- write_res = write_data (out, out2, gzbuf, towrite, &skip,
|
|
+ write_res = write_data (out, NULL, gzbuf, towrite, &skip,
|
|
&sum_written);
|
|
if (write_res < 0)
|
|
{
|
|
- ret = (write_res == -3) ? -3 : -2;
|
|
+ ret = write_res;
|
|
goto out;
|
|
}
|
|
}
|
|
@@ -499,7 +515,7 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|
&sum_written);
|
|
if (write_res < 0)
|
|
{
|
|
- ret = (write_res == -3) ? -3 : -2;
|
|
+ ret = write_res;
|
|
goto out;
|
|
}
|
|
}
|
|
--
|
|
2.19.1
|
|
|