upgrade version to 5.1.0

2024-02-07 11:40:18 +08:00 · 2024-02-07 11:40:18 +08:00 · ab7246f21f
commit ab7246f21f
parent eaa6321a96
7 changed files with 21 additions and 380 deletions
--- a/380.patch
+++ b/380.patch
@ -1,24 +0,0 @@
-From d18f2f22218ea0e0b5327b5a2bda789afdf16e41 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
-Date: Fri, 14 Jul 2023 12:18:25 +0200
-Subject: [PATCH] Skip test_isoschematron.test_schematron_invalid_schema_empty
- without the RNG file
-
-The expected SchematronParseError only happens when validate_schema is true.
---
- src/lxml/tests/test_isoschematron.py | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py
-index 6d2aa3fb6..900f257c3 100644
--- a/src/lxml/tests/test_isoschematron.py
-+++ b/src/lxml/tests/test_isoschematron.py
-@@ -55,6 +55,8 @@ def test_schematron_empty_pattern(self):
-         schema = isoschematron.Schematron(schema)
-         self.assertTrue(schema)
- 
-+    @unittest.skipIf(not isoschematron.schematron_schema_valid_supported,
-+                     'SchematronParseError is risen only when validate_schema is true')
-     def test_schematron_invalid_schema_empty(self):
-         schema = self.parse('''\
- <schema xmlns="http://purl.oclc.org/dsdl/schematron" />
--- a/Make-the-validation-of-ISO-Schematron-files-optional.patch
+++ b/Make-the-validation-of-ISO-Schematron-files-optional.patch
@ -1,116 +0,0 @@
-From a500f721e3b34018f0a86af275427663dc337b5a Mon Sep 17 00:00:00 2001
-From: Stefan Behnel <stefan_ml@behnel.de>
-Date: Wed, 12 Jul 2023 16:59:07 +0200
-Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml,
- depending on the availability of the RNG validation file. Some lxml
- distributions discard the validation schema file due to licensing issues.
-
-See https://bugs.launchpad.net/lxml/+bug/2024343
---
- CHANGES.txt                        | 11 +++++++++++
- doc/validation.txt                 |  9 +++++++++
- src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++-----
- 3 files changed, 39 insertions(+), 5 deletions(-)
-
-diff --git a/CHANGES.txt b/CHANGES.txt
-index 24052db..e68ee9a 100644
--- a/CHANGES.txt
-+++ b/CHANGES.txt
-@@ -2,6 +2,17 @@
- lxml changelog
- ==============
- 
-+4.9.3+
-+======
-+
-+* LP#2024343: The validation of the schema file itself is now optional in the
-+  ISO-Schematron implementation.  This was done because some lxml distributions
-+  discard the RNG validation schema file due to licensing issues.  The validation
-+  can now always be disabled with ``Schematron(..., validate_schema=False)``.
-+  It is enabled by default if available and disabled otherwise.  The module
-+  constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
-+  to detect whether schema file validation is available.
-+
- 4.9.3 (2023-07-05)
- ==================
- 
-diff --git a/doc/validation.txt b/doc/validation.txt
-index af9d007..27c0ccd 100644
--- a/doc/validation.txt
-+++ b/doc/validation.txt
-@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be
- a very powerful tool e.g. for establishing validation stages or to provide 
- different validators for different "validation audiences".
- 
-+Note: Some lxml distributions exclude the validation schema file due to licensing issues.
-+Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with
-+``Schematron(..., validate_schema=False)``.
-+It is enabled by default if available and disabled otherwise.  Previous versions of
-+lxml always had it enabled and failed at import time if the file was not available.
-+Thus, some distributions chose to remove the entire ISO-Schematron support.
-+The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
-+since lxml 4.9.2-8 to detect whether schema file validation is available.
-+
- (Pre-ISO-Schematron)
- --------------------
- 
-diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py
-index 5967b10..2846a66 100644
--- a/src/lxml/isoschematron/__init__.py
-+++ b/src/lxml/isoschematron/__init__.py
-@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
- svrl_validation_errors = _etree.XPath(
-     '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
- 
-
- # RelaxNG validator for schematron schemas
-schematron_schema_valid = _etree.RelaxNG(
-    file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
-+schematron_schema_valid_supported = False
-+try:
-+    schematron_schema_valid = _etree.RelaxNG(
-+        file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
-+    schematron_schema_valid_supported = True
-+except _etree.RelaxNGParseError:
-+    # Some distributions delete the file due to licensing issues.
-+    def schematron_schema_valid(arg):
-+        raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
- 
- 
- def stylesheet_params(**kwargs):
-@@ -153,6 +159,13 @@ class Schematron(_etree._Validator):
-     report document gets stored and can be accessed as the ``validation_report``
-     property.
- 
-+    If ``validate_schema`` is set to False, the validation of the schema file
-+    itself is disabled.  Validation happens by default after building the full
-+    schema, unless the schema validation file cannot be found at import time,
-+    in which case the validation gets disabled.  Some lxml distributions exclude
-+    this file due to licensing issues.  ISO-Schematron validation can then still
-+    be used normally, but the schemas themselves cannot be validated.
-+
-     Here is a usage example::
- 
-       >>> from lxml import etree
-@@ -234,7 +247,8 @@ class Schematron(_etree._Validator):
-     def __init__(self, etree=None, file=None, include=True, expand=True,
-                  include_params={}, expand_params={}, compile_params={},
-                  store_schematron=False, store_xslt=False, store_report=False,
-                 phase=None, error_finder=ASSERTS_ONLY):
-+                 phase=None, error_finder=ASSERTS_ONLY,
-+                 validate_schema=schematron_schema_valid_supported):
-         super(Schematron, self).__init__()
- 
-         self._store_report = store_report
-@@ -273,7 +287,7 @@ class Schematron(_etree._Validator):
-             schematron = self._include(schematron, **include_params)
-         if expand:
-             schematron = self._expand(schematron, **expand_params)
-        if not schematron_schema_valid(schematron):
-+        if validate_schema and not schematron_schema_valid(schematron):
-             raise _etree.SchematronParseError(
-                 "invalid schematron schema: %s" %
-                 schematron_schema_valid.error_log)
-- 
-2.40.1
-
--- a/Skip-failing-test_iterparse_utf16_bom.patch
+++ b/Skip-failing-test_iterparse_utf16_bom.patch
@ -8,24 +8,24 @@ Subject: [PATCH] Skip failing test_iterparse_utf16_bom
 1 file changed, 2 insertions(+)

 diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py
-index cbdbcef..6349b90 100644
+index 8fac41d..2b5d0de 100644
 --- a/src/lxml/tests/test_io.py
 +++ b/src/lxml/tests/test_io.py
-@@ -7,6 +7,7 @@ IO test cases that apply to both etree and ElementTree
- from __future__ import absolute_import
+@@ -4,6 +4,7 @@ IO test cases that apply to both etree and ElementTree
+ 
 
 import unittest
 +from unittest import skip
 import tempfile, gzip, os, os.path, gc, shutil
 
 from .common_imports import (
-@@ -304,6 +305,7 @@ class _IOTestCaseBase(HelperTestCase):
+@@ -305,6 +306,7 @@ class _IOTestCaseBase(HelperTestCase):
             os.unlink(f.name)
         self.assertEqual(utext, root.text)
 
 +    @skip
     def test_iterparse_utf16_bom(self):
-         utext = _str('Søk på nettet')
+         utext = 'Søk på nettet'
         uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext
 -- 
 2.33.0
--- a/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch
+++ b/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch
@ -1,226 +0,0 @@
-From 72f5a287a4016ecb405f2e8a4a03ae22a5b0b496 Mon Sep 17 00:00:00 2001
-From: Stefan Behnel <stefan_ml@behnel.de>
-Date: Wed, 5 Jul 2023 22:10:45 +0200
-Subject: [PATCH] Change HTML "prefix" handling in ElementPath to let
- "element.find('part1:part2')" search for "part1:part2" instead of just
- "part2" with an unknown prefix. Also adapt the HTML "prefix" parsing test to
- make it work in libxml2 2.10.4 and later, where HTML "prefixes" are kept as
- part of the tag name by the parser.
-
---
- src/lxml/_elementpath.py     | 22 +++++++++++-----------
- src/lxml/apihelpers.pxi      |  7 +++++++
- src/lxml/etree.pyx           |  8 ++++----
- src/lxml/includes/tree.pxd   | 12 ++++++++++++
- src/lxml/tests/test_etree.py | 26 ++++++++++++++++++++++----
- 5 files changed, 56 insertions(+), 19 deletions(-)
-
-diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py
-index eabd81c..001b345 100644
--- a/src/lxml/_elementpath.py
-+++ b/src/lxml/_elementpath.py
-@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile(
-     r"\s+"
-     )
- 
-def xpath_tokenizer(pattern, namespaces=None):
-+def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
-     # ElementTree uses '', lxml used None originally.
-     default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
-     parsing_attribute = False
-     for token in xpath_tokenizer_re.findall(pattern):
-         ttype, tag = token
-         if tag and tag[0] != "{":
-            if ":" in tag:
-+            if ":" in tag and with_prefixes:
-                 prefix, uri = tag.split(":", 1)
-                 try:
-                     if not namespaces:
-@@ -251,7 +251,7 @@ ops = {
- _cache = {}
- 
- 
-def _build_path_iterator(path, namespaces):
-+def _build_path_iterator(path, namespaces, with_prefixes=True):
-     """compile selector pattern"""
-     if path[-1:] == "/":
-         path += "*"  # implicit all (FIXME: keep this?)
-@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces):
- 
-     if path[:1] == "/":
-         raise SyntaxError("cannot use absolute path on element")
-    stream = iter(xpath_tokenizer(path, namespaces))
-+    stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
-     try:
-         _next = stream.next
-     except AttributeError:
-@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces):
- ##
- # Iterate over the matching nodes
- 
-def iterfind(elem, path, namespaces=None):
-    selector = _build_path_iterator(path, namespaces)
-+def iterfind(elem, path, namespaces=None, with_prefixes=True):
-+    selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
-     result = iter((elem,))
-     for select in selector:
-         result = select(result)
-@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None):
- ##
- # Find first matching object.
- 
-def find(elem, path, namespaces=None):
-    it = iterfind(elem, path, namespaces)
-+def find(elem, path, namespaces=None, with_prefixes=True):
-+    it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
-     try:
-         return next(it)
-     except StopIteration:
-@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None):
- ##
- # Find all matching objects.
- 
-def findall(elem, path, namespaces=None):
-+def findall(elem, path, namespaces=None, with_prefixes=True):
-     return list(iterfind(elem, path, namespaces))
- 
- 
- ##
- # Find text for first matching object.
- 
-def findtext(elem, path, default=None, namespaces=None):
-    el = find(elem, path, namespaces)
-+def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
-+    el = find(elem, path, namespaces, with_prefixes=with_prefixes)
-     if el is None:
-         return default
-     else:
-diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
-index 9fae9fb..35b3187 100644
--- a/src/lxml/apihelpers.pxi
-+++ b/src/lxml/apihelpers.pxi
-@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent):
-     finally:
-         return  # swallow any exceptions
- 
-+cdef inline bint _isHtmlDocument(_Element element) except -1:
-+    cdef xmlNode* c_node = element._c_node
-+    return (
-+        c_node is not NULL and c_node.doc is not NULL and
-+        c_node.doc.properties & tree.XML_DOC_HTML != 0
-+    )
-+
- cdef inline int _assertValidNode(_Element element) except -1:
-     assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
- 
-diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx
-index c0d236b..9acea68 100644
--- a/src/lxml/etree.pyx
-+++ b/src/lxml/etree.pyx
-@@ -1547,7 +1547,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
-         """
-         if isinstance(path, QName):
-             path = (<QName>path).text
-        return _elementpath.find(self, path, namespaces)
-+        return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
- 
-     def findtext(self, path, default=None, namespaces=None):
-         u"""findtext(self, path, default=None, namespaces=None)
-@@ -1560,7 +1560,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
-         """
-         if isinstance(path, QName):
-             path = (<QName>path).text
-        return _elementpath.findtext(self, path, default, namespaces)
-+        return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self))
- 
-     def findall(self, path, namespaces=None):
-         u"""findall(self, path, namespaces=None)
-@@ -1573,7 +1573,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
-         """
-         if isinstance(path, QName):
-             path = (<QName>path).text
-        return _elementpath.findall(self, path, namespaces)
-+        return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
- 
-     def iterfind(self, path, namespaces=None):
-         u"""iterfind(self, path, namespaces=None)
-@@ -1586,7 +1586,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
-         """
-         if isinstance(path, QName):
-             path = (<QName>path).text
-        return _elementpath.iterfind(self, path, namespaces)
-+        return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
- 
-     def xpath(self, _path, *, namespaces=None, extensions=None,
-               smart_strings=True, **_variables):
-diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd
-index 010af80..d709313 100644
--- a/src/lxml/includes/tree.pxd
-+++ b/src/lxml/includes/tree.pxd
-@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h":
-         XML_EXTERNAL_PARAMETER_ENTITY=        5
-         XML_INTERNAL_PREDEFINED_ENTITY=       6
- 
-+    ctypedef enum xmlDocProperties:
-+        XML_DOC_WELLFORMED          = 1    # /* document is XML well formed */
-+        XML_DOC_NSVALID             = 2    # /* document is Namespace valid */
-+        XML_DOC_OLD10               = 4    # /* parsed with old XML-1.0 parser */
-+        XML_DOC_DTDVALID            = 8    # /* DTD validation was successful */
-+        XML_DOC_XINCLUDE            = 16   # /* XInclude substitution was done */
-+        XML_DOC_USERBUILT           = 32   # /* Document was built using the API
-+                                           #    and not by parsing an instance */
-+        XML_DOC_INTERNAL            = 64   # /* built for internal processing */
-+        XML_DOC_HTML                = 128  # /* parsed or built HTML document */
-+
-     ctypedef struct xmlNs:
-         const_xmlChar* href
-         const_xmlChar* prefix
-@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h":
-         void* _private
-         xmlDtd* intSubset
-         xmlDtd* extSubset
-+        int properties
-         
-     ctypedef struct xmlAttr:
-         void* _private
-diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
-index 0339796..80a12a4 100644
--- a/src/lxml/tests/test_etree.py
-+++ b/src/lxml/tests/test_etree.py
-@@ -3069,11 +3069,29 @@ class ETreeOnlyTestCase(HelperTestCase):
- 
-     def test_html_prefix_nsmap(self):
-         etree = self.etree
-        el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
-        if etree.LIBXML_VERSION < (2, 9, 11):
-            self.assertEqual({'hha': None}, el.nsmap)
-+        el = etree.HTML('<hha:page-description>aa</hha:page-description>')
-+        pd = el[-1]
-+        while len(pd):
-+            pd = pd[-1]
-+
-+        if etree.LIBXML_VERSION >= (2, 10, 4):
-+            # "Prefix" is kept as part of the tag name.
-+            self.assertEqual("hha:page-description", pd.tag)
-+            self.assertIsNone(el.find('.//page-description'))
-+            self.assertIsNotNone(el.find('.//hha:page-description'))  # no namespaces!
-+            for e in el.iter():
-+                self.assertEqual({}, e.nsmap)
-+        elif etree.LIBXML_VERSION >= (2, 9, 11):
-+            # "Prefix" is stripped.
-+            self.assertEqual("page-description", pd.tag)
-+            self.assertIsNotNone(el.find('.//page-description'))
-+            for e in el.iter():
-+                self.assertEqual({}, e.nsmap)
-         else:
-            self.assertEqual({}, el.nsmap)
-+            # "Prefix" is parsed as XML prefix.
-+            self.assertEqual("page-description", pd.tag)
-+            pd = el.find('.//page-description')
-+            self.assertEqual({'hha': None}, pd.nsmap)
- 
-     def test_getchildren(self):
-         Element = self.etree.Element
-- 
-2.33.0
-
--- a/lxml-4.9.3.tar.gz
+++ b/lxml-4.9.3.tar.gz
--- a/lxml-5.1.0.tar.gz
+++ b/lxml-5.1.0.tar.gz
--- a/python-lxml.spec
+++ b/python-lxml.spec
@ -6,18 +6,14 @@ the simplicity of a native Python API, mostly compatible but superior to the wel
 The latest release works with all CPython versions from 2.7 to 3.7.

 Name:		python-lxml
-Version:	4.9.3
-Release:	2
+Version:	5.1.0
+Release:	1
 Summary:	XML processing library combining libxml2/libxslt with the ElementTree API
 License:	BSD
 URL:		https://github.com/lxml/lxml
-Source0:	https://files.pythonhosted.org/packages/30/39/7305428d1c4f28282a4f5bdbef24e0f905d351f34cf351ceb131f5cddf78/lxml-4.9.3.tar.gz
+Source0:	https://files.pythonhosted.org/packages/2b/b4/bbccb250adbee490553b6a52712c46c20ea1ba533a643f1424b27ffc6845/lxml-5.1.0.tar.gz

-Patch0: Make-the-validation-of-ISO-Schematron-files-optional.patch
-Patch1: 380.patch
-Patch2: Skip-failing-test_iterparse_utf16_bom.patch
-
-Patch6000: backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch
+Patch0: Skip-failing-test_iterparse_utf16_bom.patch

 %description
 %{_description}
@ -71,7 +67,7 @@ mv %{buildroot}/filelist.lst .
 mv %{buildroot}/doclist.lst .

 %check
-make test3
+make test

 %files -n python3-lxml -f filelist.lst
 %license doc/licenses/*.txt LICENSES.txt
@ -81,6 +77,17 @@ make test3
 %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt

 %changelog
+* Wed Feb 07 2024 dongyuzhen <dongyuzhen@h-partners.com> - 5.1.0-1
+- upgrade version to 5.1.0:
+  - some incorrect declarations were removed from ``python.pxd``
+  - built with Cython 3.0.7
+  - some redundant and long deprecated methods were removed
+  - character escaping in ``C14N2`` serialisation now uses a single pass over the text instead of searching for each unescaped character separately
+  - early support for Python 3.13a2 was added
+  - support for Python 2.7 and Python versions < 3.6 was removed
+  - parsing ASCII strings is slightly faster
+  - some bugs fixes
+
 * Wed Aug 09 2023 zhuofeng <zhuofeng2@huawei.com> - 4.9.3-2
 - sync fedara patch