qt6-qtbase/qtbase6.5.2-CVE-2023-38197.patch

372 lines
13 KiB
Diff
Raw Normal View History

2025-05-18 23:29:18 +00:00
From 49a4cf8133eb8d76115fb7827bd76764e1b30d12 Mon Sep 17 00:00:00 2001
From: peijiankang <peijiankang@kylinos.cn>
Date: Wed, 29 Nov 2023 20:51:35 +0800
Subject: [PATCH] CVE-2023-38197
---
src/corelib/serialization/qxmlstream.cpp | 144 +++++++++++++++++-
src/corelib/serialization/qxmlstream_p.h | 11 ++
.../qxmlstream/tokenError/dtdInBody.xml | 21 +++
.../qxmlstream/tokenError/multipleDtd.xml | 21 +++
.../qxmlstream/tokenError/wellFormed.xml | 16 ++
.../qxmlstream/tst_qxmlstream.cpp | 38 +++++
6 files changed, 243 insertions(+), 8 deletions(-)
create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp
index 3175517a..7dcf80c3 100644
--- a/src/corelib/serialization/qxmlstream.cpp
+++ b/src/corelib/serialization/qxmlstream.cpp
@@ -185,7 +185,7 @@ WRAP(indexOf, QLatin1StringView)
addData() or by waiting for it to arrive on the device().
\value UnexpectedElementError The parser encountered an element
- that was different to those it expected.
+ or token that was different to those it expected.
*/
@@ -322,13 +322,34 @@ QXmlStreamEntityResolver *QXmlStreamReader::entityResolver() const
QXmlStreamReader is a well-formed XML 1.0 parser that does \e not
include external parsed entities. As long as no error occurs, the
- application code can thus be assured that the data provided by the
- stream reader satisfies the W3C's criteria for well-formed XML. For
- example, you can be certain that all tags are indeed nested and
- closed properly, that references to internal entities have been
- replaced with the correct replacement text, and that attributes have
- been normalized or added according to the internal subset of the
- DTD.
+ application code can thus be assured, that
+ \list
+ \li the data provided by the stream reader satisfies the W3C's
+ criteria for well-formed XML,
+ \li tokens are provided in a valid order.
+ \endlist
+
+ Unless QXmlStreamReader raises an error, it guarantees the following:
+ \list
+ \li All tags are nested and closed properly.
+ \li References to internal entities have been replaced with the
+ correct replacement text.
+ \li Attributes have been normalized or added according to the
+ internal subset of the \l DTD.
+ \li Tokens of type \l StartDocument happen before all others,
+ aside from comments and processing instructions.
+ \li At most one DOCTYPE element (a token of type \l DTD) is present.
+ \li If present, the DOCTYPE appears before all other elements,
+ aside from StartDocument, comments and processing instructions.
+ \endlist
+
+ In particular, once any token of type \l StartElement, \l EndElement,
+ \l Characters, \l EntityReference or \l EndDocument is seen, no
+ tokens of type StartDocument or DTD will be seen. If one is present in
+ the input stream, out of order, an error is raised.
+
+ \note The token types \l Comment and \l ProcessingInstruction may appear
+ anywhere in the stream.
If an error occurs while parsing, atEnd() and hasError() return
true, and error() returns the error that occurred. The functions
@@ -659,6 +680,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::readNext()
d->token = -1;
return readNext();
}
+ d->checkToken();
return d->type;
}
@@ -743,6 +765,11 @@ static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray(
"ProcessingInstruction"
);
+static constexpr auto QXmlStreamReader_XmlContextString = qOffsetStringArray(
+ "Prolog",
+ "Body"
+);
+
/*!
\property QXmlStreamReader::namespaceProcessing
\brief the namespace-processing flag of the stream reader.
@@ -777,6 +804,15 @@ QString QXmlStreamReader::tokenString() const
return QLatin1StringView(QXmlStreamReader_tokenTypeString.at(d->type));
}
+/*!
+ \internal
+ \return \param loc (Prolog/Body) as a string.
+ */
+static constexpr QLatin1StringView contextString(QXmlStreamReaderPrivate::XmlContext ctxt)
+{
+ return QLatin1StringView(QXmlStreamReader_XmlContextString.at(static_cast<int>(ctxt)));
+}
+
#endif // QT_NO_XMLSTREAMREADER
QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack()
@@ -864,6 +900,8 @@ void QXmlStreamReaderPrivate::init()
type = QXmlStreamReader::NoToken;
error = QXmlStreamReader::NoError;
+ currentContext = XmlContext::Prolog;
+ foundDTD = false;
}
/*
@@ -3838,6 +3876,96 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader)
break;
}
}
+static constexpr bool isTokenAllowedInContext(QXmlStreamReader::TokenType type,
+ QXmlStreamReaderPrivate::XmlContext loc)
+{
+ switch (type) {
+ case QXmlStreamReader::StartDocument:
+ case QXmlStreamReader::DTD:
+ return loc == QXmlStreamReaderPrivate::XmlContext::Prolog;
+
+ case QXmlStreamReader::StartElement:
+ case QXmlStreamReader::EndElement:
+ case QXmlStreamReader::Characters:
+ case QXmlStreamReader::EntityReference:
+ case QXmlStreamReader::EndDocument:
+ return loc == QXmlStreamReaderPrivate::XmlContext::Body;
+
+ case QXmlStreamReader::Comment:
+ case QXmlStreamReader::ProcessingInstruction:
+ return true;
+
+ case QXmlStreamReader::NoToken:
+ case QXmlStreamReader::Invalid:
+ return false;
+ }
+
+ // GCC 8.x does not treat __builtin_unreachable() as constexpr
+#if !defined(Q_CC_GNU_ONLY) || (Q_CC_GNU >= 900)
+ Q_UNREACHABLE_RETURN(false);
+#else
+ return false;
+#endif
+}
+
+/*!
+ \internal
+ \brief QXmlStreamReader::isValidToken
+ \return \c true if \param type is a valid token type.
+ \return \c false if \param type is an unexpected token,
+ which indicates a non-well-formed or invalid XML stream.
+ */
+bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type)
+{
+ // Don't change currentContext, if Invalid or NoToken occur in the prolog
+ if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken)
+ return false;
+
+ // If a token type gets rejected in the body, there is no recovery
+ const bool result = isTokenAllowedInContext(type, currentContext);
+ if (result || currentContext == XmlContext::Body)
+ return result;
+
+ // First non-Prolog token observed => switch context to body and check again.
+ currentContext = XmlContext::Body;
+ return isTokenAllowedInContext(type, currentContext);
+}
+
+/*!
+ \internal
+ Checks token type and raises an error, if it is invalid
+ in the current context (prolog/body).
+ */
+void QXmlStreamReaderPrivate::checkToken()
+{
+ Q_Q(QXmlStreamReader);
+
+ // The token type must be consumed, to keep track if the body has been reached.
+ const XmlContext context = currentContext;
+ const bool ok = isValidToken(type);
+
+ // Do nothing if an error has been raised already (going along with an unexpected token)
+ if (error != QXmlStreamReader::Error::NoError)
+ return;
+
+ if (!ok) {
+ raiseError(QXmlStreamReader::UnexpectedElementError,
+ QObject::tr("Unexpected token type %1 in %2.")
+ .arg(q->tokenString(), contextString(context)));
+ return;
+ }
+
+ if (type != QXmlStreamReader::DTD)
+ return;
+
+ // Raise error on multiple DTD tokens
+ if (foundDTD) {
+ raiseError(QXmlStreamReader::UnexpectedElementError,
+ QObject::tr("Found second DTD token in %1.").arg(contextString(context)));
+ } else {
+ foundDTD = true;
+ }
+}
/*!
\fn bool QXmlStreamAttributes::hasAttribute(QAnyStringView qualifiedName) const
diff --git a/src/corelib/serialization/qxmlstream_p.h b/src/corelib/serialization/qxmlstream_p.h
index 7c46d187..f805cedb 100644
--- a/src/corelib/serialization/qxmlstream_p.h
+++ b/src/corelib/serialization/qxmlstream_p.h
@@ -296,6 +296,17 @@ public:
QStringDecoder decoder;
bool atEnd;
+ enum class XmlContext
+ {
+ Prolog,
+ Body,
+ };
+
+ XmlContext currentContext = XmlContext::Prolog;
+ bool foundDTD = false;
+ bool isValidToken(QXmlStreamReader::TokenType type);
+ void checkToken();
+
/*!
\sa setType()
*/
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
new file mode 100644
index 00000000..68ef2962
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
@@ -0,0 +1,21 @@
+<!DOCTYPE TEST [
+ <!ELEMENT TESTATTRIBUTE (CASE+)>
+ <!ELEMENT CASE (CLASS, FUNCTION)>
+ <!ELEMENT CLASS (#PCDATA)>
+
+ <!-- adding random ENTITY statement, as this is typical DTD content -->
+ <!ENTITY unite "&#x222a;">
+
+ <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<TEST>
+ <CASE>
+ <CLASS>tst_QXmlStream</CLASS>
+ </CASE>
+ <!-- invalid DTD in XML body follows -->
+ <!DOCTYPE DTDTEST [
+ <!ELEMENT RESULT (CASE+)>
+ <!ATTLIST RESULT OUTPUT CDATA #REQUIRED>
+ ]>
+</TEST>
+
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
new file mode 100644
index 00000000..1dbe75c4
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
@@ -0,0 +1,21 @@
+<!DOCTYPE TEST [
+ <!ELEMENT TESTATTRIBUTE (CASE+)>
+ <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
+ <!ELEMENT CLASS (#PCDATA)>
+
+ <!-- adding random ENTITY statements, as this is typical DTD content -->
+ <!ENTITY iff "&hArr;">
+
+ <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<!-- invalid second DTD follows -->
+<!DOCTYPE SECOND [
+ <!ELEMENT SECONDATTRIBUTE (#PCDATA)>
+ <!ENTITY on "&#8728;">
+]>
+<TEST>
+ <CASE>
+ <CLASS>tst_QXmlStream</CLASS>
+ </CASE>
+</TEST>
+
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
new file mode 100644
index 00000000..9dfbc0f9
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
@@ -0,0 +1,16 @@
+<!DOCTYPE TEST [
+ <!ELEMENT TESTATTRIBUTE (CASE+)>
+ <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
+ <!ELEMENT CLASS (#PCDATA)>
+
+ <!-- adding random ENTITY statements, as this is typical DTD content -->
+ <!ENTITY unite "&#x222a;">
+
+ <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<TEST>
+ <CASE>
+ <CLASS>tst_QXmlStream</CLASS>
+ </CASE>
+</TEST>
+
diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
index b7f603c7..839d9edc 100644
--- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
+++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
@@ -590,6 +590,8 @@ private slots:
void entityExpansionLimit() const;
+ void tokenErrorHandling_data() const;
+ void tokenErrorHandling() const;
private:
static QByteArray readFile(const QString &filename);
@@ -1818,6 +1820,42 @@ void tst_QXmlStream::roundTrip() const
QCOMPARE(out, in);
}
+void tst_QXmlStream::tokenErrorHandling_data() const
+{
+ QTest::addColumn<QString>("fileName");
+ QTest::addColumn<QXmlStreamReader::Error>("expectedError");
+ QTest::addColumn<QString>("errorKeyWord");
+
+ constexpr auto invalid = QXmlStreamReader::Error::UnexpectedElementError;
+ constexpr auto valid = QXmlStreamReader::Error::NoError;
+ QTest::newRow("DtdInBody") << "dtdInBody.xml" << invalid << "DTD";
+ QTest::newRow("multipleDTD") << "multipleDtd.xml" << invalid << "second DTD";
+ QTest::newRow("wellFormed") << "wellFormed.xml" << valid << "";
+}
+
+void tst_QXmlStream::tokenErrorHandling() const
+{
+ QFETCH(const QString, fileName);
+ QFETCH(const QXmlStreamReader::Error, expectedError);
+ QFETCH(const QString, errorKeyWord);
+
+ const QDir dir(QFINDTESTDATA("tokenError"));
+ QFile file(dir.absoluteFilePath(fileName));
+
+ // Cross-compiling: File will be on host only
+ if (!file.exists())
+ QSKIP("Testfile not found.");
+
+ file.open(QIODevice::ReadOnly);
+ QXmlStreamReader reader(&file);
+ while (!reader.atEnd())
+ reader.readNext();
+
+ QCOMPARE(reader.error(), expectedError);
+ if (expectedError != QXmlStreamReader::Error::NoError)
+ QVERIFY(reader.errorString().contains(errorKeyWord));
+}
+
void tst_QXmlStream::test_fastScanName_data() const
{
QTest::addColumn<QByteArray>("data");
--
2.41.0