From 9c87439d9afa14a365ff13e73adc809cb2c3d97b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 23 Nov 2020 00:47:02 -0500 Subject: [PATCH] feat: XML::Schema and RelaxNG creation accept optional ParseOptions I'm trying out a new pattern, which is that the parsed object carries around the ParseOptions it was created with, which should make some testing a bit easier. I'm also not implementing the "config block" pattern in use for Documents, because I think the UX is weird and I'm hoping to change everything to use kwargs in a 2.0 release, anyway. --- ext/nokogiri/xml_relax_ng.c | 39 ++++++++++++++++++-------- ext/nokogiri/xml_schema.c | 46 +++++++++++++++++++++++-------- lib/nokogiri/xml/parse_options.rb | 2 ++ lib/nokogiri/xml/relax_ng.rb | 4 +-- lib/nokogiri/xml/schema.rb | 10 ++++--- 5 files changed, 72 insertions(+), 29 deletions(-) diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index e17b11a..f361d27 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document) * * Create a new RelaxNG from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE read_memory(int argc, VALUE *argv, VALUE klass) { - xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE content; + VALUE parse_options; + xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; - VALUE errors = rb_ary_new(); + VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS @@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content) * * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; xmlDocPtr doc; xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); Data_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } ctx = xmlRelaxNGNewDocParserCtxt(doc); @@ -142,6 +158,7 @@ static VALUE from_document(VALUE klass, VALUE document) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -155,7 +172,7 @@ void init_xml_relax_ng() cNokogiriXmlRelaxNG = klass; - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); + rb_define_singleton_method(klass, "read_memory", read_memory, -1); + rb_define_singleton_method(klass, "from_document", from_document, -1); rb_define_private_method(klass, "validate_document", validate_document, 1); } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index 439f721..ea7c3d3 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -93,15 +93,26 @@ static VALUE validate_file(VALUE self, VALUE rb_filename) * * Create a new Schema from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE read_memory(int argc, VALUE *argv, VALUE klass) { + VALUE content; + VALUE parse_options; + int parse_options_int; + xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; - xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE errors; VALUE rb_schema; - VALUE errors = rb_ary_new(); + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); + + ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS @@ -109,7 +120,7 @@ static VALUE read_memory(VALUE klass, VALUE content) ctx, Nokogiri_error_array_pusher, (void *)errors - ); + ); #endif schema = xmlSchemaParse(ctx); @@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache) * * Create a new Schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; + int parse_options_int; xmlDocPtr doc; xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); Data_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) { rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous"); @@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; @@ -226,8 +248,8 @@ void init_xml_schema() cNokogiriXmlSchema = klass; - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); + rb_define_singleton_method(klass, "read_memory", read_memory, -1); + rb_define_singleton_method(klass, "from_document", from_document, -1); rb_define_private_method(klass, "validate_document", validate_document, 1); rb_define_private_method(klass, "validate_file", validate_file, 1); diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb index 8969578..c6d3d1c 100644 --- a/lib/nokogiri/xml/parse_options.rb +++ b/lib/nokogiri/xml/parse_options.rb @@ -72,6 +72,8 @@ module Nokogiri DEFAULT_XML = RECOVER | NONET # the default options used for parsing HTML documents DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET + # the default options used for parsing XML schemas + DEFAULT_SCHEMA = NONET attr_accessor :options def initialize options = STRICT diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb index 5a645a4..79bc30c 100644 --- a/lib/nokogiri/xml/relax_ng.rb +++ b/lib/nokogiri/xml/relax_ng.rb @@ -4,8 +4,8 @@ module Nokogiri ### # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+. # See Nokogiri::XML::RelaxNG for an example. - def RelaxNG string_or_io - RelaxNG.new(string_or_io) + def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) + RelaxNG.new(string_or_io, options) end end diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb index 65a7bcd..a88f69c 100644 --- a/lib/nokogiri/xml/schema.rb +++ b/lib/nokogiri/xml/schema.rb @@ -4,8 +4,8 @@ module Nokogiri ### # Create a new Nokogiri::XML::Schema object using a +string_or_io+ # object. - def Schema string_or_io - Schema.new(string_or_io) + def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) + Schema.new(string_or_io, options) end end @@ -29,12 +29,14 @@ module Nokogiri class Schema # Errors while parsing the schema file attr_accessor :errors + # The Nokogiri::XML::ParseOptions used to parse the schema + attr_accessor :parse_options ### # Create a new Nokogiri::XML::Schema object using a +string_or_io+ # object. - def self.new string_or_io - from_document Nokogiri::XML(string_or_io) + def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA + from_document(Nokogiri::XML(string_or_io), options) end ### -- 2.23.0