196 lines
6.5 KiB
Diff
196 lines
6.5 KiB
Diff
From 417da4006cf5c97d44e74431b816fc58fec9e270 Mon Sep 17 00:00:00 2001
|
|
From: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
|
|
Date: Mon, 18 Mar 2019 17:48:21 +0000
|
|
Subject: [PATCH] Fix incremental parsing of chunked quoted extensions (#310)
|
|
|
|
Before this change, incremental parsing of quoted chunked extensions
|
|
was broken for two reasons:
|
|
|
|
* Http::One::Parser::skipLineTerminator() unexpectedly threw after
|
|
partially received quoted chunk extension value.
|
|
|
|
* When Http::One::Tokenizer was unable to parse a quoted extension,
|
|
it incorrectly restored the input buffer to the beginning of the
|
|
extension value (instead of the extension itself), thus making
|
|
further incremental parsing iterations impossible.
|
|
|
|
IMO, the reason for this problem was that Http::One::Tokenizer::qdText()
|
|
could not distinguish two cases (returning false in both):
|
|
|
|
* the end of the quoted string not yet reached
|
|
|
|
* an input error, e.g., wrong/unexpected character
|
|
|
|
A possible approach could be to improve Http::One::Tokenizer, making it
|
|
aware about "needs more data" state. However, to be acceptable,
|
|
these improvements should be done in the base Parser::Tokenizer
|
|
class instead. These changes seem to be non-trivial and could be
|
|
done separately and later.
|
|
|
|
Another approach, used here, is to simplify the complex and error-prone
|
|
chunked extensions parsing algorithm, fixing incremental parsing bugs
|
|
and still parse incrementally in almost all cases. The performance
|
|
regression could be expected only in relatively rare cases of partially
|
|
received or malformed extensions.
|
|
|
|
Also:
|
|
* fixed parsing of partial use-original-body extension values
|
|
* do not treat an invalid use-original-body as an unknown extension
|
|
* optimization: parse use-original-body extension only in ICAP context
|
|
(i.e., where it is expected)
|
|
* improvement: added a new API to TeChunkedParser to specify known
|
|
chunked extensions list
|
|
---
|
|
src/Debug.h | 4 ++++
|
|
src/parser/Makefile.am | 1 +
|
|
src/parser/Tokenizer.cc | 40 ++++++++++++++++++++++++++++++++++++++++
|
|
src/parser/Tokenizer.h | 13 +++++++++++++
|
|
src/parser/forward.h | 22 ++++++++++++++++++++++
|
|
5 files changed, 80 insertions(+)
|
|
create mode 100644 src/parser/forward.h
|
|
|
|
diff --git a/src/Debug.h b/src/Debug.h
|
|
index 7fb1ed5..8c24175 100644
|
|
--- a/src/Debug.h
|
|
+++ b/src/Debug.h
|
|
@@ -99,6 +99,10 @@ public:
|
|
|
|
/// configures the active debugging context to write syslog ALERT
|
|
static void ForceAlert();
|
|
+
|
|
+ /// prefixes each grouped debugs() line after the first one in the group
|
|
+ static std::ostream& Extra(std::ostream &os) { return os << "\n "; }
|
|
+
|
|
private:
|
|
static Context *Current; ///< deepest active context; nil outside debugs()
|
|
};
|
|
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am
|
|
index aef3235..c08d1d5 100644
|
|
--- a/src/parser/Makefile.am
|
|
+++ b/src/parser/Makefile.am
|
|
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
|
|
libparser_la_SOURCES = \
|
|
BinaryTokenizer.h \
|
|
BinaryTokenizer.cc \
|
|
+ forward.h \
|
|
Tokenizer.h \
|
|
Tokenizer.cc
|
|
|
|
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
|
|
index 99f8eb3..0b44e40 100644
|
|
--- a/src/parser/Tokenizer.cc
|
|
+++ b/src/parser/Tokenizer.cc
|
|
@@ -10,7 +10,9 @@
|
|
|
|
#include "squid.h"
|
|
#include "Debug.h"
|
|
+#include "parser/forward.h"
|
|
#include "parser/Tokenizer.h"
|
|
+#include "sbuf/Stream.h"
|
|
|
|
#include <cerrno>
|
|
#if HAVE_CTYPE_H
|
|
@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c
|
|
return true;
|
|
}
|
|
|
|
+SBuf
|
|
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
|
|
+{
|
|
+ if (atEnd())
|
|
+ throw InsufficientInput();
|
|
+
|
|
+ SBuf result;
|
|
+
|
|
+ if (!prefix(result, tokenChars, limit))
|
|
+ throw TexcHere(ToSBuf("cannot parse ", description));
|
|
+
|
|
+ if (atEnd())
|
|
+ throw InsufficientInput();
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
bool
|
|
Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
|
|
{
|
|
@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf:
|
|
return success(s - range.rawContent());
|
|
}
|
|
|
|
+int64_t
|
|
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
|
|
+{
|
|
+ if (atEnd())
|
|
+ throw InsufficientInput();
|
|
+
|
|
+ int64_t result = 0;
|
|
+
|
|
+ // Since we only support unsigned decimals, a parsing failure with a
|
|
+ // non-empty input always implies invalid/malformed input (or a buggy
|
|
+ // limit=0 caller). TODO: Support signed and non-decimal integers by
|
|
+ // refactoring int64() to detect insufficient input.
|
|
+ if (!int64(result, 10, false, limit))
|
|
+ throw TexcHere(ToSBuf("cannot parse ", description));
|
|
+
|
|
+ if (atEnd())
|
|
+ throw InsufficientInput(); // more digits may be coming
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
|
|
index f04fd3e..6ae8162 100644
|
|
--- a/src/parser/Tokenizer.h
|
|
+++ b/src/parser/Tokenizer.h
|
|
@@ -143,6 +143,19 @@ public:
|
|
*/
|
|
bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
|
|
|
|
+ /*
|
|
+ * The methods below mimic their counterparts documented above, but they
|
|
+ * throw on errors, including InsufficientInput. The field description
|
|
+ * parameter is used for error reporting and debugging.
|
|
+ */
|
|
+
|
|
+ /// prefix() wrapper but throws InsufficientInput if input contains
|
|
+ /// nothing but the prefix (i.e. if the prefix is not "terminated")
|
|
+ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
|
|
+
|
|
+ /// int64() wrapper but limited to unsigned decimal integers (for now)
|
|
+ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
|
|
+
|
|
protected:
|
|
SBuf consume(const SBuf::size_type n);
|
|
SBuf::size_type success(const SBuf::size_type n);
|
|
diff --git a/src/parser/forward.h b/src/parser/forward.h
|
|
new file mode 100644
|
|
index 0000000..5a95b7a
|
|
--- /dev/null
|
|
+++ b/src/parser/forward.h
|
|
@@ -0,0 +1,22 @@
|
|
+/*
|
|
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
|
|
+ *
|
|
+ * Squid software is distributed under GPLv2+ license and includes
|
|
+ * contributions from numerous individuals and organizations.
|
|
+ * Please see the COPYING and CONTRIBUTORS files for details.
|
|
+ */
|
|
+
|
|
+#ifndef SQUID_PARSER_FORWARD_H
|
|
+#define SQUID_PARSER_FORWARD_H
|
|
+
|
|
+namespace Parser {
|
|
+class Tokenizer;
|
|
+class BinaryTokenizer;
|
|
+
|
|
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
|
|
+/// thrown by modern "incremental" parsers when they need more data
|
|
+class InsufficientInput {};
|
|
+} // namespace Parser
|
|
+
|
|
+#endif /* SQUID_PARSER_FORWARD_H */
|
|
+
|
|
--
|
|
2.23.0
|