diff --git a/CVE-2022-36033.patch b/CVE-2022-36033.patch new file mode 100644 index 0000000..bfcecf8 --- /dev/null +++ b/CVE-2022-36033.patch @@ -0,0 +1,99 @@ +From 4ea768d96b3d232e63edef9594766d44597b3882 Mon Sep 17 00:00:00 2001 +From: Jonathan Hedley +Date: Sun, 21 Aug 2022 14:04:56 +1000 +Subject: [PATCH] Strip control characters from URLs when resolving absolute + URLs + +--- + .../java/org/jsoup/internal/StringUtil.java | 10 +++++++++- + .../org/jsoup/internal/StringUtilTest.java | 9 +++++++++ + .../java/org/jsoup/safety/CleanerTest.java | 18 ++++++++++++++++++ + 3 files changed, 36 insertions(+), 1 deletion(-) + +diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java +index 0835225..608e96d 100644 +--- a/src/main/java/org/jsoup/internal/StringUtil.java ++++ b/src/main/java/org/jsoup/internal/StringUtil.java +@@ -269,6 +269,7 @@ public final class StringUtil { + * @throws MalformedURLException if an error occurred generating the URL + */ + public static URL resolve(URL base, String relUrl) throws MalformedURLException { ++ relUrl = stripControlChars(relUrl); + // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired + if (relUrl.startsWith("?")) + relUrl = base.getPath() + relUrl; +@@ -287,7 +288,9 @@ public final class StringUtil { + * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) + * @return an absolute URL if one was able to be generated, or the empty string if not + */ +- public static String resolve(final String baseUrl, final String relUrl) { ++ public static String resolve(String baseUrl, String relUrl) { ++ // workaround: java will allow control chars in a path URL and may treat as relative, but Chrome / Firefox will strip and may see as a scheme. Normalize to browser's view. ++ baseUrl = stripControlChars(baseUrl); relUrl = stripControlChars(relUrl); + try { + URL base; + try { +@@ -306,6 +309,11 @@ public final class StringUtil { + } + private static final Pattern validUriScheme = Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]*:"); + ++ private static final Pattern controlChars = Pattern.compile("[\\x00-\\x1f]*"); // matches ascii 0 - 31, to strip from url ++ private static String stripControlChars(final String input) { ++ return controlChars.matcher(input).replaceAll(""); ++ } ++ + private static final ThreadLocal> threadLocalBuilders = new ThreadLocal>() { + @Override + protected Stack initialValue() { +diff --git a/src/test/java/org/jsoup/internal/StringUtilTest.java b/src/test/java/org/jsoup/internal/StringUtilTest.java +index 1956084..9ffcec9 100644 +--- a/src/test/java/org/jsoup/internal/StringUtilTest.java ++++ b/src/test/java/org/jsoup/internal/StringUtilTest.java +@@ -120,6 +120,15 @@ public class StringUtilTest { + assertEquals("http://example.com/b/c/g#s/../x", resolve("http://example.com/b/c/d;p?q", "g#s/../x")); + } + ++ @Test void stripsControlCharsFromUrls() { ++ // should resovle to an absolute url: ++ assertEquals("foo:bar", resolve("\nhttps://\texample.com/", "\r\nfo\to:ba\br")); ++ } ++ ++ @Test void allowsSpaceInUrl() { ++ assertEquals("https://example.com/foo bar/", resolve("HTTPS://example.com/example/", "../foo bar/")); ++ } ++ + @Test + void isAscii() { + assertTrue(StringUtil.isAscii("")); +diff --git a/src/test/java/org/jsoup/safety/CleanerTest.java b/src/test/java/org/jsoup/safety/CleanerTest.java +index 3338054..0e62f17 100644 +--- a/src/test/java/org/jsoup/safety/CleanerTest.java ++++ b/src/test/java/org/jsoup/safety/CleanerTest.java +@@ -309,6 +309,24 @@ public class CleanerTest { + assertEquals("Clean", clean); + } + ++ @Test void dropsConcealedJavascriptProtocolWhenRelativesLinksEnabled() { ++ Safelist safelist = Safelist.basic().preserveRelativeLinks(true); ++ String html = "Link"; ++ String clean = Jsoup.clean(html, "https://", safelist); ++ assertEquals("Link", clean); ++ ++ String colon = "Link"; ++ String cleanColon = Jsoup.clean(colon, "https://", safelist); ++ assertEquals("Link", cleanColon); ++ } ++ ++ @Test void dropsConcealedJavascriptProtocolWhenRelativesLinksDisabled() { ++ Safelist safelist = Safelist.basic().preserveRelativeLinks(false); ++ String html = "Link"; ++ String clean = Jsoup.clean(html, "https://", safelist); ++ assertEquals("Link", clean); ++ } ++ + @Test public void handlesNoHrefAttribute() { + String dirty = "One Two"; + Safelist relaxedWithAnchor = Safelist.relaxed().addProtocols("a", "href", "#"); +-- +2.33.0 + diff --git a/jsoup.spec b/jsoup.spec index ec3948b..24520db 100644 --- a/jsoup.spec +++ b/jsoup.spec @@ -1,10 +1,12 @@ Name: jsoup Version: 1.14.2 -Release: 1 +Release: 2 Summary: Java HTML Parser License: MIT URL: http://jsoup.org/ Source0: https://github.com/jhy/jsoup/archive/refs/tags/jsoup-%{version}.tar.gz +# https://github.com/jhy/jsoup/commit/4ea768d96b3d232e63edef9594766d44597b3882 +Patch0: CVE-2022-36033.patch BuildArch: noarch BuildRequires: maven-local, mvn(org.apache.felix:maven-bundle-plugin) @@ -34,6 +36,9 @@ for extracting and manipulating data, using the best of DOM, CSS, and jquery-lik %{_javadocdir}/%{name}/* %changelog +* Mon Mar 04 2024 yaoxin - 1.14.2-2 +- Fix CVE-2022-36033 + * Fri Sep 3 2021 houyingchao - 1.14.2-1 - Upgrade to 1.14.2