squid/backport-0001-CVE-2021-28652.patch

From 417da4006cf5c97d44e74431b816fc58fec9e270 Mon Sep 17 00:00:00 2001
From: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Date: Mon, 18 Mar 2019 17:48:21 +0000
Subject: [PATCH] Fix incremental parsing of chunked quoted extensions (#310)

Before this change, incremental parsing of quoted chunked extensions
was broken for two reasons:

* Http::One::Parser::skipLineTerminator() unexpectedly threw after
  partially received quoted chunk extension value.

* When Http::One::Tokenizer was unable to parse a quoted extension,
  it incorrectly restored the input buffer to the beginning of the
  extension value (instead of the extension itself), thus making
  further incremental parsing iterations impossible.

IMO, the reason for this problem was that Http::One::Tokenizer::qdText()
could not distinguish two cases (returning false in both):

* the end of the quoted string not yet reached

* an input error, e.g., wrong/unexpected character

A possible approach could be to improve Http::One::Tokenizer, making it
aware about "needs more data" state.  However, to be acceptable,
these improvements should be done in the base Parser::Tokenizer
class instead. These changes seem to be non-trivial and could be
done separately and later.

Another approach, used here, is to simplify the complex and error-prone
chunked extensions parsing algorithm, fixing incremental parsing bugs
and still parse incrementally in almost all cases. The performance
regression could be expected only in relatively rare cases of partially
received or malformed extensions.

Also:
* fixed parsing of partial use-original-body extension values
* do not treat an invalid use-original-body as an unknown extension
* optimization: parse use-original-body extension only in ICAP context
  (i.e., where it is expected)
* improvement: added a new API to TeChunkedParser to specify known
  chunked extensions list
---
 src/Debug.h             |  4 ++++
 src/parser/Makefile.am  |  1 +
 src/parser/Tokenizer.cc | 40 ++++++++++++++++++++++++++++++++++++++++
 src/parser/Tokenizer.h  | 13 +++++++++++++
 src/parser/forward.h    | 22 ++++++++++++++++++++++
 5 files changed, 80 insertions(+)
 create mode 100644 src/parser/forward.h

diff --git a/src/Debug.h b/src/Debug.h
index 7fb1ed5..8c24175 100644
--- a/src/Debug.h
+++ b/src/Debug.h
@@ -99,6 +99,10 @@ public:

     /// configures the active debugging context to write syslog ALERT
     static void ForceAlert();
+
+    /// prefixes each grouped debugs() line after the first one in the group
+    static std::ostream& Extra(std::ostream &os) { return os << "\n    "; }
+
 private:
     static Context *Current; ///< deepest active context; nil outside debugs()
 };
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am
index aef3235..c08d1d5 100644
--- a/src/parser/Makefile.am
+++ b/src/parser/Makefile.am
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
 libparser_la_SOURCES = \
 	BinaryTokenizer.h \
 	BinaryTokenizer.cc \
+	forward.h \
 	Tokenizer.h \
 	Tokenizer.cc

diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 99f8eb3..0b44e40 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -10,7 +10,9 @@

 #include "squid.h"
 #include "Debug.h"
+#include "parser/forward.h"
 #include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"

 #include <cerrno>
 #if HAVE_CTYPE_H
@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c
     return true;
 }

+SBuf
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
+{
+    if (atEnd())
+        throw InsufficientInput();
+
+    SBuf result;
+
+    if (!prefix(result, tokenChars, limit))
+        throw TexcHere(ToSBuf("cannot parse ", description));
+
+    if (atEnd())
+        throw InsufficientInput();
+
+    return result;
+}
+
 bool
 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf:
     return success(s - range.rawContent());
 }

+int64_t
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
+{
+    if (atEnd())
+        throw InsufficientInput();
+
+    int64_t result = 0;
+
+    // Since we only support unsigned decimals, a parsing failure with a
+    // non-empty input always implies invalid/malformed input (or a buggy
+    // limit=0 caller). TODO: Support signed and non-decimal integers by
+    // refactoring int64() to detect insufficient input.
+    if (!int64(result, 10, false, limit))
+        throw TexcHere(ToSBuf("cannot parse ", description));
+
+    if (atEnd())
+        throw InsufficientInput(); // more digits may be coming
+
+    return result;
+}
+
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
index f04fd3e..6ae8162 100644
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -143,6 +143,19 @@ public:
      */
     bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);

+    /*
+     * The methods below mimic their counterparts documented above, but they
+     * throw on errors, including InsufficientInput. The field description
+     * parameter is used for error reporting and debugging.
+     */
+
+    /// prefix() wrapper but throws InsufficientInput if input contains
+    /// nothing but the prefix (i.e. if the prefix is not "terminated")
+    SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
+
+    /// int64() wrapper but limited to unsigned decimal integers (for now)
+    int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
+
 protected:
     SBuf consume(const SBuf::size_type n);
     SBuf::size_type success(const SBuf::size_type n);
diff --git a/src/parser/forward.h b/src/parser/forward.h
new file mode 100644
index 0000000..5a95b7a
--- /dev/null
+++ b/src/parser/forward.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_PARSER_FORWARD_H
+#define SQUID_PARSER_FORWARD_H
+
+namespace Parser {
+class Tokenizer;
+class BinaryTokenizer;
+
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
+/// thrown by modern "incremental" parsers when they need more data
+class InsufficientInput {};
+} // namespace Parser
+
+#endif /* SQUID_PARSER_FORWARD_H */
+
--
2.23.0