From 15293f1243e1dd4756ffc1d13d5a8ea49167174f Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Wed, 19 Jul 2023 16:59:11 -0600 Subject: [PATCH] (#996): Fix arbitrary file read when href has special characters In UrlResolver::resolve_href() we now explicitly disallow URLs that have a query string ("?") or a fragment identifier ("#"). We also explicitly check for a base URL and not resolving to a path, for example, "file:///base/foo.svg" + "." would resolve to "file:///base/" - this is technically correct, but we don't want to resolve to directories. Also, we pass a canonicalized path name as a URL upstream, so that g_file_new_from_url() will consume it later, instead of passing the original and potentially malicious URL. Fixes https://gitlab.gnome.org/GNOME/librsvg/-/issues/996 Reference:https://gitlab.gnome.org/GNOME/librsvg/-/commit/15293f1243e1dd4756ffc1d13d5a8ea49167174f Conflict:Adaptation Context --- include/librsvg/rsvg.h | 12 +- src/error.rs | 27 ++- src/lib.rs | 12 +- src/url_resolver.rs | 183 ++++++++++++++---- tests/Makefile.am | 2 + tests/fixtures/loading/bar.svg | 1 + tests/fixtures/loading/disallowed-996-ref.svg | 10 + tests/fixtures/loading/disallowed-996.svg | 12 ++ tests/fixtures/loading/foo.svg | 1 + tests/fixtures/loading/subdir/baz.svg | 1 + tests/src/loading_disallowed.rs | 7 + tests/src/main.rs | 3 + 12 files changed, 213 insertions(+), 58 deletions(-) create mode 100644 tests/fixtures/loading/bar.svg create mode 100644 tests/fixtures/loading/disallowed-996-ref.svg create mode 100644 tests/fixtures/loading/disallowed-996.svg create mode 100644 tests/fixtures/loading/foo.svg create mode 100644 tests/fixtures/loading/subdir/baz.svg create mode 100644 tests/src/loading_disallowed.rs diff --git a/include/librsvg/rsvg.h b/include/librsvg/rsvg.h index 964002354..fdea8c246 100644 --- a/include/librsvg/rsvg.h +++ b/include/librsvg/rsvg.h @@ -132,28 +132,30 @@ GType rsvg_error_get_type (void); * 1. All `data:` URLs may be loaded. These are sometimes used * to include raster image data, encoded as base-64, directly in an SVG file. * - * 2. All other URL schemes in references require a base URL. For + * 2. URLs with queries ("?") or fragment identifiers ("#") are not allowed. + * + * 3. All URL schemes other than data: in references require a base URL. For * example, this means that if you load an SVG with * [ctor@Rsvg.Handle.new_from_data] without calling [method@Rsvg.Handle.set_base_uri], * then any referenced files will not be allowed (e.g. raster images to be * loaded from other files will not work). * - * 3. If referenced URLs are absolute, rather than relative, then they must + * 4. If referenced URLs are absolute, rather than relative, then they must * have the same scheme as the base URL. For example, if the base URL has a * `file` scheme, then all URL references inside the SVG must * also have the `file` scheme, or be relative references which * will be resolved against the base URL. * - * 4. If referenced URLs have a `resource` scheme, that is, + * 5. If referenced URLs have a `resource` scheme, that is, * if they are included into your binary program with GLib's resource * mechanism, they are allowed to be loaded (provided that the base URL is * also a `resource`, per the previous rule). * - * 5. Otherwise, non-`file` schemes are not allowed. For + * 6. Otherwise, non-`file` schemes are not allowed. For * example, librsvg will not load `http` resources, to keep * malicious SVG data from "phoning home". * - * 6. A relative URL must resolve to the same directory as the base URL, or to + * 7. A relative URL must resolve to the same directory as the base URL, or to * one of its subdirectories. Librsvg will canonicalize filenames, by * removing ".." path components and resolving symbolic links, to decide whether * files meet these conditions. diff --git a/src/error.rs b/src/error.rs index 1ca9bf0cc..acdab22b6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -313,6 +313,12 @@ pub enum AllowedUrlError { /// or in one directory below the base file. NotSiblingOrChildOfBaseFile, + /// Loaded file:// URLs cannot have a query part, e.g. `file:///foo?blah` + NoQueriesAllowed, + + /// URLs may not have fragment identifiers at this stage + NoFragmentIdentifierAllowed, + /// Error when obtaining the file path or the base file path InvalidPath, @@ -325,17 +331,18 @@ pub enum AllowedUrlError { impl fmt::Display for AllowedUrlError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use AllowedUrlError::*; match *self { - AllowedUrlError::UrlParseError(e) => write!(f, "URL parse error: {}", e), - AllowedUrlError::BaseRequired => write!(f, "base required"), - AllowedUrlError::DifferentUriSchemes => write!(f, "different URI schemes"), - AllowedUrlError::DisallowedScheme => write!(f, "disallowed scheme"), - AllowedUrlError::NotSiblingOrChildOfBaseFile => { - write!(f, "not sibling or child of base file") - } - AllowedUrlError::InvalidPath => write!(f, "invalid path"), - AllowedUrlError::BaseIsRoot => write!(f, "base is root"), - AllowedUrlError::CanonicalizationError => write!(f, "canonicalization error"), + UrlParseError(e) => write!(f, "URL parse error: {e}"), + BaseRequired => write!(f, "base required"), + DifferentUriSchemes => write!(f, "different URI schemes"), + DisallowedScheme => write!(f, "disallowed scheme"), + NotSiblingOrChildOfBaseFile => write!(f, "not sibling or child of base file"), + NoQueriesAllowed => write!(f, "no queries allowed"), + NoFragmentIdentifierAllowed => write!(f, "no fragment identifier allowed"), + InvalidPath => write!(f, "invalid path"), + BaseIsRoot => write!(f, "base is root"), + CanonicalizationError => write!(f, "canonicalization error"), } } } diff --git a/src/lib.rs b/src/lib.rs index 3dfb39f46..140a6cc89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,28 +105,30 @@ //! include raster image data, encoded as base-64, directly in an SVG //! file. //! -//! 2. All other URL schemes in references require a base URL. For +//! 2. URLs with queries ("?") or fragment identifiers ("#") are not allowed. +//! +//! 3. All URL schemes other than data: in references require a base URL. For //! example, this means that if you load an SVG with [`Loader::read_stream`] //! without providing a `base_file`, then any referenced files will not //! be allowed (e.g. raster images to be loaded from other files will //! not work). //! -//! 3. If referenced URLs are absolute, rather than relative, then +//! 4. If referenced URLs are absolute, rather than relative, then //! they must have the same scheme as the base URL. For example, if //! the base URL has a "`file`" scheme, then all URL references inside //! the SVG must also have the "`file`" scheme, or be relative //! references which will be resolved against the base URL. //! -//! 4. If referenced URLs have a "`resource`" scheme, that is, if they +//! 5. If referenced URLs have a "`resource`" scheme, that is, if they //! are included into your binary program with GLib's resource //! mechanism, they are allowed to be loaded (provided that the base //! URL is also a "`resource`", per the previous rule). //! -//! 5. Otherwise, non-`file` schemes are not allowed. For example, +//! 6. Otherwise, non-`file` schemes are not allowed. For example, //! librsvg will not load `http` resources, to keep malicious SVG data //! from "phoning home". //! -//! 6. A relative URL must resolve to the same directory as the base +//! 7. A relative URL must resolve to the same directory as the base //! URL, or to one of its subdirectories. Librsvg will canonicalize //! filenames, by removing "`..`" path components and resolving symbolic //! links, to decide whether files meet these conditions. diff --git a/src/url_resolver.rs b/src/url_resolver.rs index 4ec9c07c..df8f68f8 100644 --- a/src/url_resolver.rs +++ b/src/url_resolver.rs @@ -1,13 +1,13 @@ //! Determine which URLs are allowed for loading. use std::fmt; -use std::io; use std::ops::Deref; -use std::path::{Path, PathBuf}; use url::Url; use crate::error::AllowedUrlError; +/// Decides which URLs are allowed to be loaded. +/// /// Currently only contains the base URL. /// /// The plan is to add: @@ -29,6 +29,11 @@ impl UrlResolver { UrlResolver { base_url } } + /// Decides which URLs are allowed to be loaded based on the presence of a base URL. + /// + /// This function implements the policy described in "Security and locations of + /// referenced files" in the [crate + /// documentation](index.html#security-and-locations-of-referenced-files). pub fn resolve_href(&self, href: &str) -> Result { let url = Url::options() .base_url(self.base_url.as_ref()) @@ -40,6 +45,17 @@ impl UrlResolver { return Ok(AllowedUrl(url)); } + // Queries are not allowed. + if url.query().is_some() { + return Err(AllowedUrlError::NoQueriesAllowed); + } + + // Fragment identifiers are not allowed. They should have been stripped + // upstream, by NodeId. + if url.fragment().is_some() { + return Err(AllowedUrlError::NoFragmentIdentifierAllowed); + } + // All other sources require a base url if self.base_url.is_none() { return Err(AllowedUrlError::BaseRequired); @@ -62,6 +78,26 @@ impl UrlResolver { return Err(AllowedUrlError::DisallowedScheme); } + // The rest of this function assumes file: URLs; guard against + // incorrect refactoring. + assert!(url.scheme() == "file"); + + // If we have a base_uri of "file:///foo/bar.svg", and resolve an href of ".", + // Url.parse() will give us "file:///foo/". We don't want that, so check + // if the last path segment is empty - it will not be empty for a normal file. + + if let Some(segments) = url.path_segments() { + if segments + .last() + .expect("URL path segments always contain at last 1 element") + .is_empty() + { + return Err(AllowedUrlError::NotSiblingOrChildOfBaseFile); + } + } else { + unreachable!("the file: URL cannot have an empty path"); + } + // We have two file: URIs. Now canonicalize them (remove .. and symlinks, etc.) // and see if the directories match @@ -79,13 +115,17 @@ impl UrlResolver { let base_parent = base_parent.unwrap(); - let url_canon = - canonicalize(&url_path).map_err(|_| AllowedUrlError::CanonicalizationError)?; - let parent_canon = - canonicalize(base_parent).map_err(|_| AllowedUrlError::CanonicalizationError)?; - - if url_canon.starts_with(parent_canon) { - Ok(AllowedUrl(url)) + let path_canon = url_path + .canonicalize() + .map_err(|_| AllowedUrlError::CanonicalizationError)?; + let parent_canon = base_parent + .canonicalize() + .map_err(|_| AllowedUrlError::CanonicalizationError)?; + + if path_canon.starts_with(parent_canon) { + // Finally, convert the canonicalized path back to a URL. + let path_to_url = Url::from_file_path(path_canon).unwrap(); + Ok(AllowedUrl(path_to_url)) } else { Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) } @@ -116,21 +156,12 @@ impl fmt::Display for AllowedUrl { } } -// For tests, we don't want to touch the filesystem. In that case, -// assume that we are being passed canonical file names. -#[cfg(not(test))] -fn canonicalize>(path: P) -> Result { - path.as_ref().canonicalize() -} -#[cfg(test)] -fn canonicalize>(path: P) -> Result { - Ok(path.as_ref().to_path_buf()) -} - #[cfg(test)] mod tests { use super::*; + use std::path::PathBuf; + #[test] fn disallows_relative_file_with_no_base_file() { let url_resolver = UrlResolver::new(None); @@ -191,49 +222,125 @@ mod tests { ); } + fn url_from_test_fixtures(filename_relative_to_librsvg_srcdir: &str) -> Url { + let path = PathBuf::from(filename_relative_to_librsvg_srcdir); + let absolute = path + .canonicalize() + .expect("files from test fixtures are supposed to canonicalize"); + Url::from_file_path(absolute).unwrap() + } + #[test] fn allows_relative() { - let url_resolver = UrlResolver::new(Some( - Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), - )); + let base_url = url_from_test_fixtures("tests/fixtures/loading/bar.svg"); + let url_resolver = UrlResolver::new(Some(base_url)); + let resolved = url_resolver.resolve_href("foo.svg").unwrap(); - let expected = make_file_uri("/example/foo.svg"); - assert_eq!(resolved.as_ref(), expected); + let resolved_str = resolved.as_str(); + assert!(resolved_str.ends_with("/loading/foo.svg")); } #[test] fn allows_sibling() { - let url_resolver = UrlResolver::new(Some( - Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), - )); + let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( + "tests/fixtures/loading/bar.svg", + ))); let resolved = url_resolver - .resolve_href(&make_file_uri("/example/foo.svg")) + .resolve_href(url_from_test_fixtures("tests/fixtures/loading/foo.svg").as_str()) .unwrap(); - let expected = make_file_uri("/example/foo.svg"); - assert_eq!(resolved.as_ref(), expected); + + let resolved_str = resolved.as_str(); + assert!(resolved_str.ends_with("/loading/foo.svg")); } #[test] fn allows_child_of_sibling() { - let url_resolver = UrlResolver::new(Some( - Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), - )); + let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( + "tests/fixtures/loading/bar.svg", + ))); let resolved = url_resolver - .resolve_href(&make_file_uri("/example/subdir/foo.svg")) + .resolve_href(url_from_test_fixtures("tests/fixtures/loading/subdir/baz.svg").as_str()) .unwrap(); - let expected = make_file_uri("/example/subdir/foo.svg"); - assert_eq!(resolved.as_ref(), expected); + + let resolved_str = resolved.as_str(); + assert!(resolved_str.ends_with("/loading/subdir/baz.svg")); } + // Ignore on Windows since we test for /etc/passwd + #[cfg(unix)] #[test] fn disallows_non_sibling() { + let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( + "tests/fixtures/loading/bar.svg", + ))); + assert!(matches!( + url_resolver.resolve_href(&make_file_uri("/etc/passwd")), + Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) + )); + } + + #[test] + fn disallows_queries() { let url_resolver = UrlResolver::new(Some( Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), )); assert!(matches!( - url_resolver.resolve_href(&make_file_uri("/etc/passwd")), + url_resolver.resolve_href(".?../../../../../../../../../../etc/passwd"), + Err(AllowedUrlError::NoQueriesAllowed) + )); + } + + #[test] + fn disallows_weird_relative_uris() { + let url_resolver = UrlResolver::new(Some( + Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), + )); + + assert!(url_resolver + .resolve_href(".@../../../../../../../../../../etc/passwd") + .is_err()); + assert!(url_resolver + .resolve_href(".$../../../../../../../../../../etc/passwd") + .is_err()); + assert!(url_resolver + .resolve_href(".%../../../../../../../../../../etc/passwd") + .is_err()); + assert!(url_resolver + .resolve_href(".*../../../../../../../../../../etc/passwd") + .is_err()); + assert!(url_resolver + .resolve_href("~/../../../../../../../../../../etc/passwd") + .is_err()); + } + + #[test] + fn disallows_dot_sibling() { + let url_resolver = UrlResolver::new(Some( + Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), + )); + + assert!(matches!( + url_resolver.resolve_href("."), Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) )); + assert!(matches!( + url_resolver.resolve_href(".#../../../../../../../../../../etc/passwd"), + Err(AllowedUrlError::NoFragmentIdentifierAllowed) + )); + } + + #[test] + fn disallows_fragment() { + // UrlResolver::resolve_href() explicitly disallows fragment identifiers. + // This is because they should have been stripped before calling that function, + // by NodeId or the Iri machinery. + let url_resolver = + UrlResolver::new(Some(Url::parse("https://example.com/foo.svg").unwrap())); + + assert!(matches!( + url_resolver.resolve_href("bar.svg#fragment"), + Err(AllowedUrlError::NoFragmentIdentifierAllowed) + )); } #[cfg(windows)] diff --git a/tests/Makefile.am b/tests/Makefile.am index 62ad4544a..6e5835a2c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -10,6 +10,7 @@ test_sources = \ src/intrinsic_dimensions.rs \ src/legacy_sizing.rs \ src/loading_crash.rs \ + src/loading_disallowed.rs \ src/main.rs \ src/primitive_geometries.rs \ src/primitives.rs \ @@ -61,6 +62,7 @@ test_fixtures = \ $(wildcard $(srcdir)/fixtures/errors/*) \ $(wildcard $(srcdir)/fixtures/geometries/*) \ $(wildcard $(srcdir)/fixtures/loading/*) \ + $(wildcard $(srcdir)/fixtures/loading/subdir/*) \ $(wildcard $(srcdir)/fixtures/primitive_geometries/*) \ $(wildcard $(srcdir)/fixtures/reftests/*.css) \ $(wildcard $(srcdir)/fixtures/reftests/*.svg) \ diff --git a/tests/fixtures/loading/bar.svg b/tests/fixtures/loading/bar.svg new file mode 100644 index 000000000..304670099 --- /dev/null +++ b/tests/fixtures/loading/bar.svg @@ -0,0 +1 @@ + diff --git a/tests/fixtures/loading/disallowed-996-ref.svg b/tests/fixtures/loading/disallowed-996-ref.svg new file mode 100644 index 000000000..6d7e6750a --- /dev/null +++ b/tests/fixtures/loading/disallowed-996-ref.svg @@ -0,0 +1,10 @@ + + + + + + This text should appear + + diff --git a/tests/fixtures/loading/disallowed-996.svg b/tests/fixtures/loading/disallowed-996.svg new file mode 100644 index 000000000..a33acf8aa --- /dev/null +++ b/tests/fixtures/loading/disallowed-996.svg @@ -0,0 +1,12 @@ + + + + + + + This text should appear + + + diff --git a/tests/fixtures/loading/foo.svg b/tests/fixtures/loading/foo.svg new file mode 100644 index 000000000..304670099 --- /dev/null +++ b/tests/fixtures/loading/foo.svg @@ -0,0 +1 @@ + diff --git a/tests/fixtures/loading/subdir/baz.svg b/tests/fixtures/loading/subdir/baz.svg new file mode 100644 index 000000000..304670099 --- /dev/null +++ b/tests/fixtures/loading/subdir/baz.svg @@ -0,0 +1 @@ + diff --git a/tests/src/loading_disallowed.rs b/tests/src/loading_disallowed.rs new file mode 100644 index 000000000..595116440 --- /dev/null +++ b/tests/src/loading_disallowed.rs @@ -0,0 +1,7 @@ +use crate::test_svg_reference; + +test_svg_reference!( + bug_996_malicious_url, + "tests/fixtures/loading/disallowed-996.svg", + "tests/fixtures/loading/disallowed-996-ref.svg" +); diff --git a/tests/src/main.rs b/tests/src/main.rs index 467cbb47b..ea3ee338b 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -28,6 +28,9 @@ mod legacy_sizing; #[cfg(test)] mod loading_crash; +#[cfg(test)] +mod loading_disallowed; + #[cfg(test)] mod predicates; -- GitLab