279 lines
9.7 KiB
Diff
279 lines
9.7 KiB
Diff
|
|
From 9c87439d9afa14a365ff13e73adc809cb2c3d97b Mon Sep 17 00:00:00 2001
|
||
|
|
From: Mike Dalessio <mike.dalessio@gmail.com>
|
||
|
|
Date: Mon, 23 Nov 2020 00:47:02 -0500
|
||
|
|
Subject: [PATCH] feat: XML::Schema and RelaxNG creation accept optional
|
||
|
|
ParseOptions
|
||
|
|
|
||
|
|
I'm trying out a new pattern, which is that the parsed object carries
|
||
|
|
around the ParseOptions it was created with, which should make some
|
||
|
|
testing a bit easier.
|
||
|
|
|
||
|
|
I'm also not implementing the "config block" pattern in use for
|
||
|
|
Documents, because I think the UX is weird and I'm hoping to change
|
||
|
|
everything to use kwargs in a 2.0 release, anyway.
|
||
|
|
---
|
||
|
|
ext/nokogiri/xml_relax_ng.c | 39 ++++++++++++++++++--------
|
||
|
|
ext/nokogiri/xml_schema.c | 46 +++++++++++++++++++++++--------
|
||
|
|
lib/nokogiri/xml/parse_options.rb | 2 ++
|
||
|
|
lib/nokogiri/xml/relax_ng.rb | 4 +--
|
||
|
|
lib/nokogiri/xml/schema.rb | 10 ++++---
|
||
|
|
5 files changed, 72 insertions(+), 29 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c
|
||
|
|
index e17b11a..f361d27 100644
|
||
|
|
--- a/ext/nokogiri/xml_relax_ng.c
|
||
|
|
+++ b/ext/nokogiri/xml_relax_ng.c
|
||
|
|
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
|
||
|
|
*
|
||
|
|
* Create a new RelaxNG from the contents of +string+
|
||
|
|
*/
|
||
|
|
-static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
+static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
||
|
|
{
|
||
|
|
- xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
|
||
|
|
- (const char *)StringValuePtr(content),
|
||
|
|
- (int)RSTRING_LEN(content)
|
||
|
|
- );
|
||
|
|
+ VALUE content;
|
||
|
|
+ VALUE parse_options;
|
||
|
|
+ xmlRelaxNGParserCtxtPtr ctx;
|
||
|
|
xmlRelaxNGPtr schema;
|
||
|
|
- VALUE errors = rb_ary_new();
|
||
|
|
+ VALUE errors;
|
||
|
|
VALUE rb_schema;
|
||
|
|
+ int scanned_args = 0;
|
||
|
|
+
|
||
|
|
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
||
|
|
+ if (scanned_args == 1) {
|
||
|
|
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
||
|
|
+ }
|
||
|
|
|
||
|
|
+ ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
||
|
|
+
|
||
|
|
+ errors = rb_ary_new();
|
||
|
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
||
|
|
|
||
|
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
||
|
|
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
|
||
|
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
||
|
|
rb_iv_set(rb_schema, "@errors", errors);
|
||
|
|
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
|
||
|
|
|
||
|
|
return rb_schema;
|
||
|
|
}
|
||
|
|
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
*
|
||
|
|
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
||
|
|
*/
|
||
|
|
-static VALUE from_document(VALUE klass, VALUE document)
|
||
|
|
+static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
||
|
|
{
|
||
|
|
+ VALUE document;
|
||
|
|
+ VALUE parse_options;
|
||
|
|
xmlDocPtr doc;
|
||
|
|
xmlRelaxNGParserCtxtPtr ctx;
|
||
|
|
xmlRelaxNGPtr schema;
|
||
|
|
VALUE errors;
|
||
|
|
VALUE rb_schema;
|
||
|
|
+ int scanned_args = 0;
|
||
|
|
+
|
||
|
|
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
||
|
|
|
||
|
|
Data_Get_Struct(document, xmlDoc, doc);
|
||
|
|
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
|
||
|
|
|
||
|
|
- /* In case someone passes us a node. ugh. */
|
||
|
|
- doc = doc->doc;
|
||
|
|
+ if (scanned_args == 1) {
|
||
|
|
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
||
|
|
+ }
|
||
|
|
|
||
|
|
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
||
|
|
|
||
|
|
@@ -142,6 +158,7 @@ static VALUE from_document(VALUE klass, VALUE document)
|
||
|
|
|
||
|
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
||
|
|
rb_iv_set(rb_schema, "@errors", errors);
|
||
|
|
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
|
||
|
|
|
||
|
|
return rb_schema;
|
||
|
|
}
|
||
|
|
@@ -155,7 +172,7 @@ void init_xml_relax_ng()
|
||
|
|
|
||
|
|
cNokogiriXmlRelaxNG = klass;
|
||
|
|
|
||
|
|
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
||
|
|
- rb_define_singleton_method(klass, "from_document", from_document, 1);
|
||
|
|
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
|
||
|
|
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
|
||
|
|
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
||
|
|
}
|
||
|
|
diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c
|
||
|
|
index 439f721..ea7c3d3 100644
|
||
|
|
--- a/ext/nokogiri/xml_schema.c
|
||
|
|
+++ b/ext/nokogiri/xml_schema.c
|
||
|
|
@@ -93,15 +93,26 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
|
||
|
|
*
|
||
|
|
* Create a new Schema from the contents of +string+
|
||
|
|
*/
|
||
|
|
-static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
+static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
||
|
|
{
|
||
|
|
+ VALUE content;
|
||
|
|
+ VALUE parse_options;
|
||
|
|
+ int parse_options_int;
|
||
|
|
+ xmlSchemaParserCtxtPtr ctx;
|
||
|
|
xmlSchemaPtr schema;
|
||
|
|
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
|
||
|
|
- (const char *)StringValuePtr(content),
|
||
|
|
- (int)RSTRING_LEN(content)
|
||
|
|
- );
|
||
|
|
+ VALUE errors;
|
||
|
|
VALUE rb_schema;
|
||
|
|
- VALUE errors = rb_ary_new();
|
||
|
|
+ int scanned_args = 0;
|
||
|
|
+
|
||
|
|
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
||
|
|
+ if (scanned_args == 1) {
|
||
|
|
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
||
|
|
+ }
|
||
|
|
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
||
|
|
+
|
||
|
|
+ ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
||
|
|
+
|
||
|
|
+ errors = rb_ary_new();
|
||
|
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
||
|
|
|
||
|
|
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
||
|
|
@@ -109,7 +120,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
ctx,
|
||
|
|
Nokogiri_error_array_pusher,
|
||
|
|
(void *)errors
|
||
|
|
- );
|
||
|
|
+ );
|
||
|
|
#endif
|
||
|
|
|
||
|
|
schema = xmlSchemaParse(ctx);
|
||
|
|
@@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
||
|
|
|
||
|
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
||
|
|
rb_iv_set(rb_schema, "@errors", errors);
|
||
|
|
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
|
||
|
|
|
||
|
|
return rb_schema;
|
||
|
|
}
|
||
|
|
@@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache)
|
||
|
|
*
|
||
|
|
* Create a new Schema from the Nokogiri::XML::Document +doc+
|
||
|
|
*/
|
||
|
|
-static VALUE from_document(VALUE klass, VALUE document)
|
||
|
|
+static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
||
|
|
{
|
||
|
|
+ VALUE document;
|
||
|
|
+ VALUE parse_options;
|
||
|
|
+ int parse_options_int;
|
||
|
|
xmlDocPtr doc;
|
||
|
|
xmlSchemaParserCtxtPtr ctx;
|
||
|
|
xmlSchemaPtr schema;
|
||
|
|
VALUE errors;
|
||
|
|
VALUE rb_schema;
|
||
|
|
+ int scanned_args = 0;
|
||
|
|
+
|
||
|
|
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
||
|
|
|
||
|
|
Data_Get_Struct(document, xmlDoc, doc);
|
||
|
|
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
|
||
|
|
|
||
|
|
- /* In case someone passes us a node. ugh. */
|
||
|
|
- doc = doc->doc;
|
||
|
|
+ if (scanned_args == 1) {
|
||
|
|
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
||
|
|
+ }
|
||
|
|
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
||
|
|
|
||
|
|
if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
|
||
|
|
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
|
||
|
|
@@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document)
|
||
|
|
|
||
|
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
||
|
|
rb_iv_set(rb_schema, "@errors", errors);
|
||
|
|
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
|
||
|
|
|
||
|
|
return rb_schema;
|
||
|
|
|
||
|
|
@@ -226,8 +248,8 @@ void init_xml_schema()
|
||
|
|
|
||
|
|
cNokogiriXmlSchema = klass;
|
||
|
|
|
||
|
|
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
||
|
|
- rb_define_singleton_method(klass, "from_document", from_document, 1);
|
||
|
|
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
|
||
|
|
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
|
||
|
|
|
||
|
|
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
||
|
|
rb_define_private_method(klass, "validate_file", validate_file, 1);
|
||
|
|
diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb
|
||
|
|
index 8969578..c6d3d1c 100644
|
||
|
|
--- a/lib/nokogiri/xml/parse_options.rb
|
||
|
|
+++ b/lib/nokogiri/xml/parse_options.rb
|
||
|
|
@@ -72,6 +72,8 @@ module Nokogiri
|
||
|
|
DEFAULT_XML = RECOVER | NONET
|
||
|
|
# the default options used for parsing HTML documents
|
||
|
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
||
|
|
+ # the default options used for parsing XML schemas
|
||
|
|
+ DEFAULT_SCHEMA = NONET
|
||
|
|
|
||
|
|
attr_accessor :options
|
||
|
|
def initialize options = STRICT
|
||
|
|
diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb
|
||
|
|
index 5a645a4..79bc30c 100644
|
||
|
|
--- a/lib/nokogiri/xml/relax_ng.rb
|
||
|
|
+++ b/lib/nokogiri/xml/relax_ng.rb
|
||
|
|
@@ -4,8 +4,8 @@ module Nokogiri
|
||
|
|
###
|
||
|
|
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
||
|
|
# See Nokogiri::XML::RelaxNG for an example.
|
||
|
|
- def RelaxNG string_or_io
|
||
|
|
- RelaxNG.new(string_or_io)
|
||
|
|
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
||
|
|
+ RelaxNG.new(string_or_io, options)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb
|
||
|
|
index 65a7bcd..a88f69c 100644
|
||
|
|
--- a/lib/nokogiri/xml/schema.rb
|
||
|
|
+++ b/lib/nokogiri/xml/schema.rb
|
||
|
|
@@ -4,8 +4,8 @@ module Nokogiri
|
||
|
|
###
|
||
|
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
||
|
|
# object.
|
||
|
|
- def Schema string_or_io
|
||
|
|
- Schema.new(string_or_io)
|
||
|
|
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
||
|
|
+ Schema.new(string_or_io, options)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
@@ -29,12 +29,14 @@ module Nokogiri
|
||
|
|
class Schema
|
||
|
|
# Errors while parsing the schema file
|
||
|
|
attr_accessor :errors
|
||
|
|
+ # The Nokogiri::XML::ParseOptions used to parse the schema
|
||
|
|
+ attr_accessor :parse_options
|
||
|
|
|
||
|
|
###
|
||
|
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
||
|
|
# object.
|
||
|
|
- def self.new string_or_io
|
||
|
|
- from_document Nokogiri::XML(string_or_io)
|
||
|
|
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
|
||
|
|
+ from_document(Nokogiri::XML(string_or_io), options)
|
||
|
|
end
|
||
|
|
|
||
|
|
###
|
||
|
|
--
|
||
|
|
2.23.0
|
||
|
|
|