upgrade version to 5.1.0

This commit is contained in:
dongyuzhen 2024-02-07 11:40:18 +08:00
parent eaa6321a96
commit ab7246f21f
7 changed files with 21 additions and 380 deletions

View File

@ -1,24 +0,0 @@
From d18f2f22218ea0e0b5327b5a2bda789afdf16e41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
Date: Fri, 14 Jul 2023 12:18:25 +0200
Subject: [PATCH] Skip test_isoschematron.test_schematron_invalid_schema_empty
without the RNG file
The expected SchematronParseError only happens when validate_schema is true.
---
src/lxml/tests/test_isoschematron.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py
index 6d2aa3fb6..900f257c3 100644
--- a/src/lxml/tests/test_isoschematron.py
+++ b/src/lxml/tests/test_isoschematron.py
@@ -55,6 +55,8 @@ def test_schematron_empty_pattern(self):
schema = isoschematron.Schematron(schema)
self.assertTrue(schema)
+ @unittest.skipIf(not isoschematron.schematron_schema_valid_supported,
+ 'SchematronParseError is risen only when validate_schema is true')
def test_schematron_invalid_schema_empty(self):
schema = self.parse('''\
<schema xmlns="http://purl.oclc.org/dsdl/schematron" />

View File

@ -1,116 +0,0 @@
From a500f721e3b34018f0a86af275427663dc337b5a Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Wed, 12 Jul 2023 16:59:07 +0200
Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml,
depending on the availability of the RNG validation file. Some lxml
distributions discard the validation schema file due to licensing issues.
See https://bugs.launchpad.net/lxml/+bug/2024343
---
CHANGES.txt | 11 +++++++++++
doc/validation.txt | 9 +++++++++
src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++-----
3 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 24052db..e68ee9a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,17 @@
lxml changelog
==============
+4.9.3+
+======
+
+* LP#2024343: The validation of the schema file itself is now optional in the
+ ISO-Schematron implementation. This was done because some lxml distributions
+ discard the RNG validation schema file due to licensing issues. The validation
+ can now always be disabled with ``Schematron(..., validate_schema=False)``.
+ It is enabled by default if available and disabled otherwise. The module
+ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
+ to detect whether schema file validation is available.
+
4.9.3 (2023-07-05)
==================
diff --git a/doc/validation.txt b/doc/validation.txt
index af9d007..27c0ccd 100644
--- a/doc/validation.txt
+++ b/doc/validation.txt
@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be
a very powerful tool e.g. for establishing validation stages or to provide
different validators for different "validation audiences".
+Note: Some lxml distributions exclude the validation schema file due to licensing issues.
+Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with
+``Schematron(..., validate_schema=False)``.
+It is enabled by default if available and disabled otherwise. Previous versions of
+lxml always had it enabled and failed at import time if the file was not available.
+Thus, some distributions chose to remove the entire ISO-Schematron support.
+The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
+since lxml 4.9.2-8 to detect whether schema file validation is available.
+
(Pre-ISO-Schematron)
--------------------
diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py
index 5967b10..2846a66 100644
--- a/src/lxml/isoschematron/__init__.py
+++ b/src/lxml/isoschematron/__init__.py
@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
svrl_validation_errors = _etree.XPath(
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
-
# RelaxNG validator for schematron schemas
-schematron_schema_valid = _etree.RelaxNG(
- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
+schematron_schema_valid_supported = False
+try:
+ schematron_schema_valid = _etree.RelaxNG(
+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
+ schematron_schema_valid_supported = True
+except _etree.RelaxNGParseError:
+ # Some distributions delete the file due to licensing issues.
+ def schematron_schema_valid(arg):
+ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
def stylesheet_params(**kwargs):
@@ -153,6 +159,13 @@ class Schematron(_etree._Validator):
report document gets stored and can be accessed as the ``validation_report``
property.
+ If ``validate_schema`` is set to False, the validation of the schema file
+ itself is disabled. Validation happens by default after building the full
+ schema, unless the schema validation file cannot be found at import time,
+ in which case the validation gets disabled. Some lxml distributions exclude
+ this file due to licensing issues. ISO-Schematron validation can then still
+ be used normally, but the schemas themselves cannot be validated.
+
Here is a usage example::
>>> from lxml import etree
@@ -234,7 +247,8 @@ class Schematron(_etree._Validator):
def __init__(self, etree=None, file=None, include=True, expand=True,
include_params={}, expand_params={}, compile_params={},
store_schematron=False, store_xslt=False, store_report=False,
- phase=None, error_finder=ASSERTS_ONLY):
+ phase=None, error_finder=ASSERTS_ONLY,
+ validate_schema=schematron_schema_valid_supported):
super(Schematron, self).__init__()
self._store_report = store_report
@@ -273,7 +287,7 @@ class Schematron(_etree._Validator):
schematron = self._include(schematron, **include_params)
if expand:
schematron = self._expand(schematron, **expand_params)
- if not schematron_schema_valid(schematron):
+ if validate_schema and not schematron_schema_valid(schematron):
raise _etree.SchematronParseError(
"invalid schematron schema: %s" %
schematron_schema_valid.error_log)
--
2.40.1

View File

@ -8,24 +8,24 @@ Subject: [PATCH] Skip failing test_iterparse_utf16_bom
1 file changed, 2 insertions(+)
diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py
index cbdbcef..6349b90 100644
index 8fac41d..2b5d0de 100644
--- a/src/lxml/tests/test_io.py
+++ b/src/lxml/tests/test_io.py
@@ -7,6 +7,7 @@ IO test cases that apply to both etree and ElementTree
from __future__ import absolute_import
@@ -4,6 +4,7 @@ IO test cases that apply to both etree and ElementTree
import unittest
+from unittest import skip
import tempfile, gzip, os, os.path, gc, shutil
from .common_imports import (
@@ -304,6 +305,7 @@ class _IOTestCaseBase(HelperTestCase):
@@ -305,6 +306,7 @@ class _IOTestCaseBase(HelperTestCase):
os.unlink(f.name)
self.assertEqual(utext, root.text)
+ @skip
def test_iterparse_utf16_bom(self):
utext = _str('Søk på nettet')
utext = 'Søk på nettet'
uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext
--
2.33.0

View File

@ -1,226 +0,0 @@
From 72f5a287a4016ecb405f2e8a4a03ae22a5b0b496 Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Wed, 5 Jul 2023 22:10:45 +0200
Subject: [PATCH] Change HTML "prefix" handling in ElementPath to let
"element.find('part1:part2')" search for "part1:part2" instead of just
"part2" with an unknown prefix. Also adapt the HTML "prefix" parsing test to
make it work in libxml2 2.10.4 and later, where HTML "prefixes" are kept as
part of the tag name by the parser.
---
src/lxml/_elementpath.py | 22 +++++++++++-----------
src/lxml/apihelpers.pxi | 7 +++++++
src/lxml/etree.pyx | 8 ++++----
src/lxml/includes/tree.pxd | 12 ++++++++++++
src/lxml/tests/test_etree.py | 26 ++++++++++++++++++++++----
5 files changed, 56 insertions(+), 19 deletions(-)
diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py
index eabd81c..001b345 100644
--- a/src/lxml/_elementpath.py
+++ b/src/lxml/_elementpath.py
@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile(
r"\s+"
)
-def xpath_tokenizer(pattern, namespaces=None):
+def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
# ElementTree uses '', lxml used None originally.
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
parsing_attribute = False
for token in xpath_tokenizer_re.findall(pattern):
ttype, tag = token
if tag and tag[0] != "{":
- if ":" in tag:
+ if ":" in tag and with_prefixes:
prefix, uri = tag.split(":", 1)
try:
if not namespaces:
@@ -251,7 +251,7 @@ ops = {
_cache = {}
-def _build_path_iterator(path, namespaces):
+def _build_path_iterator(path, namespaces, with_prefixes=True):
"""compile selector pattern"""
if path[-1:] == "/":
path += "*" # implicit all (FIXME: keep this?)
@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces):
if path[:1] == "/":
raise SyntaxError("cannot use absolute path on element")
- stream = iter(xpath_tokenizer(path, namespaces))
+ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
try:
_next = stream.next
except AttributeError:
@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces):
##
# Iterate over the matching nodes
-def iterfind(elem, path, namespaces=None):
- selector = _build_path_iterator(path, namespaces)
+def iterfind(elem, path, namespaces=None, with_prefixes=True):
+ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
result = iter((elem,))
for select in selector:
result = select(result)
@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None):
##
# Find first matching object.
-def find(elem, path, namespaces=None):
- it = iterfind(elem, path, namespaces)
+def find(elem, path, namespaces=None, with_prefixes=True):
+ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
try:
return next(it)
except StopIteration:
@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None):
##
# Find all matching objects.
-def findall(elem, path, namespaces=None):
+def findall(elem, path, namespaces=None, with_prefixes=True):
return list(iterfind(elem, path, namespaces))
##
# Find text for first matching object.
-def findtext(elem, path, default=None, namespaces=None):
- el = find(elem, path, namespaces)
+def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
+ el = find(elem, path, namespaces, with_prefixes=with_prefixes)
if el is None:
return default
else:
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 9fae9fb..35b3187 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent):
finally:
return # swallow any exceptions
+cdef inline bint _isHtmlDocument(_Element element) except -1:
+ cdef xmlNode* c_node = element._c_node
+ return (
+ c_node is not NULL and c_node.doc is not NULL and
+ c_node.doc.properties & tree.XML_DOC_HTML != 0
+ )
+
cdef inline int _assertValidNode(_Element element) except -1:
assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx
index c0d236b..9acea68 100644
--- a/src/lxml/etree.pyx
+++ b/src/lxml/etree.pyx
@@ -1547,7 +1547,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
if isinstance(path, QName):
path = (<QName>path).text
- return _elementpath.find(self, path, namespaces)
+ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
def findtext(self, path, default=None, namespaces=None):
u"""findtext(self, path, default=None, namespaces=None)
@@ -1560,7 +1560,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
if isinstance(path, QName):
path = (<QName>path).text
- return _elementpath.findtext(self, path, default, namespaces)
+ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self))
def findall(self, path, namespaces=None):
u"""findall(self, path, namespaces=None)
@@ -1573,7 +1573,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
if isinstance(path, QName):
path = (<QName>path).text
- return _elementpath.findall(self, path, namespaces)
+ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
def iterfind(self, path, namespaces=None):
u"""iterfind(self, path, namespaces=None)
@@ -1586,7 +1586,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
if isinstance(path, QName):
path = (<QName>path).text
- return _elementpath.iterfind(self, path, namespaces)
+ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
def xpath(self, _path, *, namespaces=None, extensions=None,
smart_strings=True, **_variables):
diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd
index 010af80..d709313 100644
--- a/src/lxml/includes/tree.pxd
+++ b/src/lxml/includes/tree.pxd
@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h":
XML_EXTERNAL_PARAMETER_ENTITY= 5
XML_INTERNAL_PREDEFINED_ENTITY= 6
+ ctypedef enum xmlDocProperties:
+ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */
+ XML_DOC_NSVALID = 2 # /* document is Namespace valid */
+ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */
+ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */
+ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */
+ XML_DOC_USERBUILT = 32 # /* Document was built using the API
+ # and not by parsing an instance */
+ XML_DOC_INTERNAL = 64 # /* built for internal processing */
+ XML_DOC_HTML = 128 # /* parsed or built HTML document */
+
ctypedef struct xmlNs:
const_xmlChar* href
const_xmlChar* prefix
@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h":
void* _private
xmlDtd* intSubset
xmlDtd* extSubset
+ int properties
ctypedef struct xmlAttr:
void* _private
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 0339796..80a12a4 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -3069,11 +3069,29 @@ class ETreeOnlyTestCase(HelperTestCase):
def test_html_prefix_nsmap(self):
etree = self.etree
- el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
- if etree.LIBXML_VERSION < (2, 9, 11):
- self.assertEqual({'hha': None}, el.nsmap)
+ el = etree.HTML('<hha:page-description>aa</hha:page-description>')
+ pd = el[-1]
+ while len(pd):
+ pd = pd[-1]
+
+ if etree.LIBXML_VERSION >= (2, 10, 4):
+ # "Prefix" is kept as part of the tag name.
+ self.assertEqual("hha:page-description", pd.tag)
+ self.assertIsNone(el.find('.//page-description'))
+ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces!
+ for e in el.iter():
+ self.assertEqual({}, e.nsmap)
+ elif etree.LIBXML_VERSION >= (2, 9, 11):
+ # "Prefix" is stripped.
+ self.assertEqual("page-description", pd.tag)
+ self.assertIsNotNone(el.find('.//page-description'))
+ for e in el.iter():
+ self.assertEqual({}, e.nsmap)
else:
- self.assertEqual({}, el.nsmap)
+ # "Prefix" is parsed as XML prefix.
+ self.assertEqual("page-description", pd.tag)
+ pd = el.find('.//page-description')
+ self.assertEqual({'hha': None}, pd.nsmap)
def test_getchildren(self):
Element = self.etree.Element
--
2.33.0

Binary file not shown.

BIN
lxml-5.1.0.tar.gz Normal file

Binary file not shown.

View File

@ -6,18 +6,14 @@ the simplicity of a native Python API, mostly compatible but superior to the wel
The latest release works with all CPython versions from 2.7 to 3.7.
Name: python-lxml
Version: 4.9.3
Release: 2
Version: 5.1.0
Release: 1
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
License: BSD
URL: https://github.com/lxml/lxml
Source0: https://files.pythonhosted.org/packages/30/39/7305428d1c4f28282a4f5bdbef24e0f905d351f34cf351ceb131f5cddf78/lxml-4.9.3.tar.gz
Source0: https://files.pythonhosted.org/packages/2b/b4/bbccb250adbee490553b6a52712c46c20ea1ba533a643f1424b27ffc6845/lxml-5.1.0.tar.gz
Patch0: Make-the-validation-of-ISO-Schematron-files-optional.patch
Patch1: 380.patch
Patch2: Skip-failing-test_iterparse_utf16_bom.patch
Patch6000: backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch
Patch0: Skip-failing-test_iterparse_utf16_bom.patch
%description
%{_description}
@ -71,7 +67,7 @@ mv %{buildroot}/filelist.lst .
mv %{buildroot}/doclist.lst .
%check
make test3
make test
%files -n python3-lxml -f filelist.lst
%license doc/licenses/*.txt LICENSES.txt
@ -81,6 +77,17 @@ make test3
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
%changelog
* Wed Feb 07 2024 dongyuzhen <dongyuzhen@h-partners.com> - 5.1.0-1
- upgrade version to 5.1.0:
- some incorrect declarations were removed from ``python.pxd``
- built with Cython 3.0.7
- some redundant and long deprecated methods were removed
- character escaping in ``C14N2`` serialisation now uses a single pass over the text instead of searching for each unescaped character separately
- early support for Python 3.13a2 was added
- support for Python 2.7 and Python versions < 3.6 was removed
- parsing ASCII strings is slightly faster
- some bugs fixes
* Wed Aug 09 2023 zhuofeng <zhuofeng2@huawei.com> - 4.9.3-2
- sync fedara patch