From 417da4006cf5c97d44e74431b816fc58fec9e270 Mon Sep 17 00:00:00 2001 From: Eduard Bagdasaryan Date: Mon, 18 Mar 2019 17:48:21 +0000 Subject: [PATCH] Fix incremental parsing of chunked quoted extensions (#310) Before this change, incremental parsing of quoted chunked extensions was broken for two reasons: * Http::One::Parser::skipLineTerminator() unexpectedly threw after partially received quoted chunk extension value. * When Http::One::Tokenizer was unable to parse a quoted extension, it incorrectly restored the input buffer to the beginning of the extension value (instead of the extension itself), thus making further incremental parsing iterations impossible. IMO, the reason for this problem was that Http::One::Tokenizer::qdText() could not distinguish two cases (returning false in both): * the end of the quoted string not yet reached * an input error, e.g., wrong/unexpected character A possible approach could be to improve Http::One::Tokenizer, making it aware about "needs more data" state. However, to be acceptable, these improvements should be done in the base Parser::Tokenizer class instead. These changes seem to be non-trivial and could be done separately and later. Another approach, used here, is to simplify the complex and error-prone chunked extensions parsing algorithm, fixing incremental parsing bugs and still parse incrementally in almost all cases. The performance regression could be expected only in relatively rare cases of partially received or malformed extensions. Also: * fixed parsing of partial use-original-body extension values * do not treat an invalid use-original-body as an unknown extension * optimization: parse use-original-body extension only in ICAP context (i.e., where it is expected) * improvement: added a new API to TeChunkedParser to specify known chunked extensions list --- src/Debug.h | 4 ++++ src/parser/Makefile.am | 1 + src/parser/Tokenizer.cc | 40 ++++++++++++++++++++++++++++++++++++++++ src/parser/Tokenizer.h | 13 +++++++++++++ src/parser/forward.h | 22 ++++++++++++++++++++++ 5 files changed, 80 insertions(+) create mode 100644 src/parser/forward.h diff --git a/src/Debug.h b/src/Debug.h index 7fb1ed5..8c24175 100644 --- a/src/Debug.h +++ b/src/Debug.h @@ -99,6 +99,10 @@ public: /// configures the active debugging context to write syslog ALERT static void ForceAlert(); + + /// prefixes each grouped debugs() line after the first one in the group + static std::ostream& Extra(std::ostream &os) { return os << "\n "; } + private: static Context *Current; ///< deepest active context; nil outside debugs() }; diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index aef3235..c08d1d5 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la libparser_la_SOURCES = \ BinaryTokenizer.h \ BinaryTokenizer.cc \ + forward.h \ Tokenizer.h \ Tokenizer.cc diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 99f8eb3..0b44e40 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -10,7 +10,9 @@ #include "squid.h" #include "Debug.h" +#include "parser/forward.h" #include "parser/Tokenizer.h" +#include "sbuf/Stream.h" #include #if HAVE_CTYPE_H @@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c return true; } +SBuf +Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit) +{ + if (atEnd()) + throw InsufficientInput(); + + SBuf result; + + if (!prefix(result, tokenChars, limit)) + throw TexcHere(ToSBuf("cannot parse ", description)); + + if (atEnd()) + throw InsufficientInput(); + + return result; +} + bool Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit) { @@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf: return success(s - range.rawContent()); } +int64_t +Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit) +{ + if (atEnd()) + throw InsufficientInput(); + + int64_t result = 0; + + // Since we only support unsigned decimals, a parsing failure with a + // non-empty input always implies invalid/malformed input (or a buggy + // limit=0 caller). TODO: Support signed and non-decimal integers by + // refactoring int64() to detect insufficient input. + if (!int64(result, 10, false, limit)) + throw TexcHere(ToSBuf("cannot parse ", description)); + + if (atEnd()) + throw InsufficientInput(); // more digits may be coming + + return result; +} + diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index f04fd3e..6ae8162 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -143,6 +143,19 @@ public: */ bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos); + /* + * The methods below mimic their counterparts documented above, but they + * throw on errors, including InsufficientInput. The field description + * parameter is used for error reporting and debugging. + */ + + /// prefix() wrapper but throws InsufficientInput if input contains + /// nothing but the prefix (i.e. if the prefix is not "terminated") + SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos); + + /// int64() wrapper but limited to unsigned decimal integers (for now) + int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos); + protected: SBuf consume(const SBuf::size_type n); SBuf::size_type success(const SBuf::size_type n); diff --git a/src/parser/forward.h b/src/parser/forward.h new file mode 100644 index 0000000..5a95b7a --- /dev/null +++ b/src/parser/forward.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 1996-2019 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +#ifndef SQUID_PARSER_FORWARD_H +#define SQUID_PARSER_FORWARD_H + +namespace Parser { +class Tokenizer; +class BinaryTokenizer; + +// TODO: Move this declaration (to parser/Elements.h) if we need more like it. +/// thrown by modern "incremental" parsers when they need more data +class InsufficientInput {}; +} // namespace Parser + +#endif /* SQUID_PARSER_FORWARD_H */ + -- 2.23.0