From 74abb4f2e73bb61b17d9f1a0ad717c881943b877 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 26 Feb 2020 13:51:43 -0800 Subject: [PATCH] Work around a bug in libxml2 This commit works around a bug in libxml2 where parsing schemas can result in dangling pointers which can lead to a segv. Upstream bug is here: https://gitlab.gnome.org/GNOME/libxml2/issues/148 Fixes #1985 --- ext/nokogiri/xml_schema.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index da2774b..439f721 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -133,6 +133,31 @@ static VALUE read_memory(VALUE klass, VALUE content) return rb_schema; } +/* Schema creation will remove and deallocate "blank" nodes. + * If those blank nodes have been exposed to Ruby, they could get freed + * out from under the VALUE pointer. This function checks to see if any of + * those nodes have been exposed to Ruby, and if so we should raise an exception. + */ +static int has_blank_nodes_p(VALUE cache) +{ + long i; + + if (NIL_P(cache)) { + return 0; + } + + for (i = 0; i < RARRAY_LEN(cache); i++) { + xmlNodePtr node; + VALUE element = rb_ary_entry(cache, i); + Data_Get_Struct(element, xmlNode, node); + if (xmlIsBlankNode(node)) { + return 1; + } + } + + return 0; +} + /* * call-seq: * from_document(doc) @@ -152,6 +177,10 @@ static VALUE from_document(VALUE klass, VALUE document) /* In case someone passes us a node. ugh. */ doc = doc->doc; + if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) { + rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous"); + } + ctx = xmlSchemaNewDocParserCtxt(doc); errors = rb_ary_new(); -- 2.23.0