diff --git a/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch b/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch deleted file mode 100644 index 1cab095..0000000 --- a/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch +++ /dev/null @@ -1,2062 +0,0 @@ -diff --git a/README.md b/README.md -index 92dd339..884f9eb 100644 ---- a/README.md -+++ b/README.md -@@ -53,17 +53,11 @@ To go beyond the basics, [comprehensive documentation is available](http://www.c - - # Note on Python 2 sunsetting - --Since 2012, Beautiful Soup has been developed as a Python 2 library --which is automatically converted to Python 3 code as necessary. This --makes it impossible to take advantage of some features of Python --3. -- --For this reason, I plan to discontinue Beautiful Soup's Python 2 --support at some point after December 31, 2020: one year after the --sunset date for Python 2 itself. Beyond that point, new Beautiful Soup --development will exclusively target Python 3. Of course, older --releases of Beautiful Soup, which support both versions, will continue --to be available. -+Beautiful Soup's support for Python 2 was discontinued on December 31, -+2020: one year after the sunset date for Python 2 itself. From this -+point onward, new Beautiful Soup development will exclusively target -+Python 3. The final release of Beautiful Soup 4 to support Python 2 -+was 4.9.3. - - # Supporting the project - -@@ -93,10 +87,5 @@ $ nosetests - ``` - - ``` --$ python -m unittest discover -s bs4 -+$ python3 -m unittest discover -s bs4 - ``` -- --If you checked out the source tree, you should see a script in the --home directory called test-all-versions. This script will run the unit --tests under Python 2, then create a temporary Python 3 conversion of --the source and run the unit tests again under Python 3. -diff --git a/bs4/__init__.py b/bs4/__init__.py -index 8f78809..51ccc21 100644 ---- a/bs4/__init__.py -+++ b/bs4/__init__.py -@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a - provides methods and Pythonic idioms that make it easy to navigate, - search, and modify the parse tree. - --Beautiful Soup works with Python 2.7 and up. It works better if lxml -+Beautiful Soup works with Python 3.5 and up. It works better if lxml - and/or html5lib is installed. - - For more than you ever wanted to know about Beautiful Soup, see the -@@ -29,6 +29,11 @@ import sys - import traceback - import warnings - -+# The very first thing we do is give a useful error if someone is -+# running this code under Python 2. -+if sys.version_info.major < 3: -+ raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.') -+ - from .builder import builder_registry, ParserRejectedMarkup - from .dammit import UnicodeDammit - from .element import ( -@@ -49,10 +54,6 @@ from .element import ( - TemplateString, - ) - --# The very first thing we do is give a useful error if someone is --# running this code under Python 3 without converting it. --'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' -- - # Define some custom warnings. - class GuessedAtParserWarning(UserWarning): - """The warning issued when BeautifulSoup has to guess what parser to -@@ -100,7 +101,7 @@ class BeautifulSoup(Tag): - # Since BeautifulSoup subclasses Tag, it's possible to treat it as - # a Tag with a .name. This name makes it clear the BeautifulSoup - # object isn't a real markup tag. -- ROOT_TAG_NAME = u'[document]' -+ ROOT_TAG_NAME = '[document]' - - # If the end-user gives no indication which tree builder they - # want, look for one with these features. -@@ -217,7 +218,7 @@ class BeautifulSoup(Tag): - from_encoding = from_encoding or deprecated_argument( - "fromEncoding", "from_encoding") - -- if from_encoding and isinstance(markup, unicode): -+ if from_encoding and isinstance(markup, str): - warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") - from_encoding = None - -@@ -234,7 +235,7 @@ class BeautifulSoup(Tag): - builder_class = builder - builder = None - elif builder is None: -- if isinstance(features, basestring): -+ if isinstance(features, str): - features = [features] - if features is None or len(features) == 0: - features = self.DEFAULT_BUILDER_FEATURES -@@ -309,13 +310,13 @@ class BeautifulSoup(Tag): - markup = markup.read() - elif len(markup) <= 256 and ( - (isinstance(markup, bytes) and not b'<' in markup) -- or (isinstance(markup, unicode) and not u'<' in markup) -+ or (isinstance(markup, str) and not '<' in markup) - ): - # Print out warnings for a couple beginner problems - # involving passing non-markup to Beautiful Soup. - # Beautiful Soup will still parse the input as markup, - # just in case that's what the user really wants. -- if (isinstance(markup, unicode) -+ if (isinstance(markup, str) - and not os.path.supports_unicode_filenames): - possible_filename = markup.encode("utf8") - else: -@@ -323,7 +324,7 @@ class BeautifulSoup(Tag): - is_file = False - try: - is_file = os.path.exists(possible_filename) -- except Exception, e: -+ except Exception as e: - # This is almost certainly a problem involving - # characters not valid in filenames on this - # system. Just let it go. -@@ -353,9 +354,9 @@ class BeautifulSoup(Tag): - pass - - if not success: -- other_exceptions = [unicode(e) for e in rejections] -+ other_exceptions = [str(e) for e in rejections] - raise ParserRejectedMarkup( -- u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) -+ "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) - ) - - # Clear out the markup and remove the builder's circular -@@ -406,9 +407,9 @@ class BeautifulSoup(Tag): - if isinstance(markup, bytes): - space = b' ' - cant_start_with = (b"http:", b"https:") -- elif isinstance(markup, unicode): -- space = u' ' -- cant_start_with = (u"http:", u"https:") -+ elif isinstance(markup, str): -+ space = ' ' -+ cant_start_with = ("http:", "https:") - else: - return - -@@ -545,7 +546,7 @@ class BeautifulSoup(Tag): - containerClass = self.string_container(containerClass) - - if self.current_data: -- current_data = u''.join(self.current_data) -+ current_data = ''.join(self.current_data) - # If whitespace is not preserved, and this string contains - # nothing but ASCII spaces, replace it with a single space - # or newline. -@@ -748,9 +749,9 @@ class BeautifulSoup(Tag): - eventual_encoding = None - if eventual_encoding != None: - encoding_part = ' encoding="%s"' % eventual_encoding -- prefix = u'\n' % encoding_part -+ prefix = '\n' % encoding_part - else: -- prefix = u'' -+ prefix = '' - if not pretty_print: - indent_level = None - else: -@@ -788,4 +789,4 @@ class FeatureNotFound(ValueError): - if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) -- print(soup.prettify()) -+ print((soup.prettify())) -diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py -index 03da4c6..03fbd6a 100644 ---- a/bs4/builder/__init__.py -+++ b/bs4/builder/__init__.py -@@ -300,13 +300,13 @@ class TreeBuilder(object): - universal = self.cdata_list_attributes.get('*', []) - tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), None) -- for attr in attrs.keys(): -+ for attr in list(attrs.keys()): - if attr in universal or (tag_specific and attr in tag_specific): - # We have a "class"-type attribute whose string - # value is a whitespace-separated list of - # values. Split it into a list. - value = attrs[attr] -- if isinstance(value, basestring): -+ if isinstance(value, str): - values = nonwhitespace_re.findall(value) - else: - # html5lib sometimes calls setAttributes twice -@@ -496,7 +496,7 @@ class ParserRejectedMarkup(Exception): - """ - if isinstance(message_or_exception, Exception): - e = message_or_exception -- message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e)) -+ message_or_exception = "%s: %s" % (e.__class__.__name__, str(e)) - super(ParserRejectedMarkup, self).__init__(message_or_exception) - - # Builders are registered in reverse order of priority, so that custom -diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py -index a1c6134..69aefd7 100644 ---- a/bs4/builder/_html5lib.py -+++ b/bs4/builder/_html5lib.py -@@ -33,7 +33,7 @@ try: - # Pre-0.99999999 - from html5lib.treebuilders import _base as treebuilder_base - new_html5lib = False --except ImportError, e: -+except ImportError as e: - # 0.99999999 and up - from html5lib.treebuilders import base as treebuilder_base - new_html5lib = True -@@ -79,7 +79,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - parser = html5lib.HTMLParser(tree=self.create_treebuilder) - self.underlying_builder.parser = parser - extra_kwargs = dict() -- if not isinstance(markup, unicode): -+ if not isinstance(markup, str): - if new_html5lib: - extra_kwargs['override_encoding'] = self.user_specified_encoding - else: -@@ -87,13 +87,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - doc = parser.parse(markup, **extra_kwargs) - - # Set the character encoding detected by the tokenizer. -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # We need to special-case this because html5lib sets - # charEncoding to UTF-8 if it gets Unicode input. - doc.original_encoding = None - else: - original_encoding = parser.tokenizer.stream.charEncoding[0] -- if not isinstance(original_encoding, basestring): -+ if not isinstance(original_encoding, str): - # In 0.99999999 and up, the encoding is an html5lib - # Encoding object. We want to use a string for compatibility - # with other tree builders. -@@ -110,7 +110,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'%s' % fragment -+ return '%s' % fragment - - - class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): -@@ -217,7 +217,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): - rv.append("|%s<%s>" % (' ' * indent, name)) - if element.attrs: - attributes = [] -- for name, value in element.attrs.items(): -+ for name, value in list(element.attrs.items()): - if isinstance(name, NamespacedAttribute): - name = "%s %s" % (prefixes[name.namespace], name.name) - if isinstance(value, list): -@@ -272,7 +272,7 @@ class Element(treebuilder_base.Node): - - def appendChild(self, node): - string_child = child = None -- if isinstance(node, basestring): -+ if isinstance(node, str): - # Some other piece of code decided to pass in a string - # instead of creating a TextElement object to contain the - # string. -@@ -289,7 +289,7 @@ class Element(treebuilder_base.Node): - child = node.element - node.parent = self - -- if not isinstance(child, basestring) and child.parent is not None: -+ if not isinstance(child, str) and child.parent is not None: - node.element.extract() - - if (string_child is not None and self.element.contents -@@ -302,7 +302,7 @@ class Element(treebuilder_base.Node): - old_element.replace_with(new_element) - self.soup._most_recent_element = new_element - else: -- if isinstance(node, basestring): -+ if isinstance(node, str): - # Create a brand new NavigableString from this string. - child = self.soup.new_string(node) - -@@ -340,7 +340,7 @@ class Element(treebuilder_base.Node): - - self.soup.builder._replace_cdata_list_attribute_values( - self.name, attributes) -- for name, value in attributes.items(): -+ for name, value in list(attributes.items()): - self.element[name] = value - - # The attributes may contain variables that need substitution. -diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py -index 96a7b7d..88860a9 100644 ---- a/bs4/builder/_htmlparser.py -+++ b/bs4/builder/_htmlparser.py -@@ -8,11 +8,11 @@ __all__ = [ - 'HTMLParserTreeBuilder', - ] - --from HTMLParser import HTMLParser -+from html.parser import HTMLParser - - try: -- from HTMLParser import HTMLParseError --except ImportError, e: -+ from html.parser import HTMLParseError -+except ImportError as e: - # HTMLParseError is removed in Python 3.5. Since it can never be - # thrown in 3.5, we can just define our own class as a placeholder. - class HTMLParseError(Exception): -@@ -219,14 +219,14 @@ class BeautifulSoupHTMLParser(HTMLParser): - continue - try: - data = bytearray([real_name]).decode(encoding) -- except UnicodeDecodeError, e: -+ except UnicodeDecodeError as e: - pass - if not data: - try: -- data = unichr(real_name) -- except (ValueError, OverflowError), e: -+ data = chr(real_name) -+ except (ValueError, OverflowError) as e: - pass -- data = data or u"\N{REPLACEMENT CHARACTER}" -+ data = data or "\N{REPLACEMENT CHARACTER}" - self.handle_data(data) - - def handle_entityref(self, name): -@@ -353,7 +353,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): - document to Unicode and parsing it. Each strategy will be tried - in turn. - """ -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # Parse Unicode as-is. - yield (markup, None, None, False) - return -@@ -376,7 +376,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): - try: - parser.feed(markup) - parser.close() -- except HTMLParseError, e: -+ except HTMLParseError as e: - warnings.warn(RuntimeWarning( - "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) - raise e -diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py -index 1b44d75..432a2c8 100644 ---- a/bs4/builder/_lxml.py -+++ b/bs4/builder/_lxml.py -@@ -8,11 +8,11 @@ __all__ = [ - - try: - from collections.abc import Callable # Python 3.6 --except ImportError , e: -+except ImportError as e: - from collections import Callable - - from io import BytesIO --from StringIO import StringIO -+from io import StringIO - from lxml import etree - from bs4.element import ( - Comment, -@@ -35,7 +35,7 @@ LXML = 'lxml' - - def _invert(d): - "Invert a dictionary." -- return dict((v,k) for k, v in d.items()) -+ return dict((v,k) for k, v in list(d.items())) - - class LXMLTreeBuilderForXML(TreeBuilder): - DEFAULT_PARSER_CLASS = etree.XMLParser -@@ -81,7 +81,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - - :param mapping: A dictionary mapping namespace prefixes to URIs. - """ -- for key, value in mapping.items(): -+ for key, value in list(mapping.items()): - if key and key not in self.soup._namespaces: - # Let the BeautifulSoup object know about a new namespace. - # If there are multiple namespaces defined with the same -@@ -169,12 +169,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): - else: - self.processing_instruction_class = XMLProcessingInstruction - -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # We were given Unicode. Maybe lxml can parse Unicode on - # this system? - yield markup, None, document_declared_encoding, False - -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # No, apparently not. Convert the Unicode to UTF-8 and - # tell lxml to parse it as UTF-8. - yield (markup.encode("utf8"), "utf8", -@@ -189,7 +189,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - def feed(self, markup): - if isinstance(markup, bytes): - markup = BytesIO(markup) -- elif isinstance(markup, unicode): -+ elif isinstance(markup, str): - markup = StringIO(markup) - - # Call feed() at least once, even if the markup is empty, -@@ -204,7 +204,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - if len(data) != 0: - self.parser.feed(data) - self.parser.close() -- except (UnicodeDecodeError, LookupError, etree.ParserError), e: -+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(e) - - def close(self): -@@ -233,7 +233,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - # Also treat the namespace mapping as a set of attributes on the - # tag, so we can recreate it later. - attrs = attrs.copy() -- for prefix, namespace in nsmap.items(): -+ for prefix, namespace in list(nsmap.items()): - attribute = NamespacedAttribute( - "xmlns", prefix, "http://www.w3.org/2000/xmlns/") - attrs[attribute] = namespace -@@ -242,7 +242,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - # from lxml with namespaces attached to their names, and - # turn then into NamespacedAttribute objects. - new_attrs = {} -- for attr, value in attrs.items(): -+ for attr, value in list(attrs.items()): - namespace, attr = self._getNsTag(attr) - if namespace is None: - new_attrs[attr] = value -@@ -302,7 +302,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'\n%s' % fragment -+ return '\n%s' % fragment - - - class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): -@@ -323,10 +323,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - self.parser = self.parser_for(encoding) - self.parser.feed(markup) - self.parser.close() -- except (UnicodeDecodeError, LookupError, etree.ParserError), e: -+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(e) - - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'%s' % fragment -+ return '%s' % fragment -diff --git a/bs4/dammit.py b/bs4/dammit.py -index 33f7b7d..ee3708f 100644 ---- a/bs4/dammit.py -+++ b/bs4/dammit.py -@@ -10,7 +10,7 @@ XML or HTML to reflect a new encoding; that's the tree builder's job. - __license__ = "MIT" - - import codecs --from htmlentitydefs import codepoint2name -+from html.entities import codepoint2name - import re - import logging - import string -@@ -22,7 +22,7 @@ try: - # PyPI package: cchardet - import cchardet - def chardet_dammit(s): -- if isinstance(s, unicode): -+ if isinstance(s, str): - return None - return cchardet.detect(s)['encoding'] - except ImportError: -@@ -32,7 +32,7 @@ except ImportError: - # PyPI package: chardet - import chardet - def chardet_dammit(s): -- if isinstance(s, unicode): -+ if isinstance(s, str): - return None - return chardet.detect(s)['encoding'] - #import chardet.constants -@@ -53,14 +53,14 @@ except ImportError: - - # Build bytestring and Unicode versions of regular expressions for finding - # a declared encoding inside an XML or HTML document. --xml_encoding = u'^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' --html_meta = u'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' -+xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' -+html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' - encoding_res = dict() - encoding_res[bytes] = { - 'html' : re.compile(html_meta.encode("ascii"), re.I), - 'xml' : re.compile(xml_encoding.encode("ascii"), re.I), - } --encoding_res[unicode] = { -+encoding_res[str] = { - 'html' : re.compile(html_meta, re.I), - 'xml' : re.compile(xml_encoding, re.I) - } -@@ -80,7 +80,7 @@ class EntitySubstitution(object): - # entities, but that's a little tricky. - extra = [(39, 'apos')] - for codepoint, name in list(codepoint2name.items()) + extra: -- character = unichr(codepoint) -+ character = chr(codepoint) - if codepoint not in (34, 39): - # There's no point in turning the quotation mark into - # " or the single quote into ', unless it -@@ -323,7 +323,7 @@ class EncodingDetector: - :return: A 2-tuple (modified data, implied encoding) - """ - encoding = None -- if isinstance(data, unicode): -+ if isinstance(data, str): - # Unicode data cannot have a byte-order mark. - return data, encoding - if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ -@@ -370,7 +370,7 @@ class EncodingDetector: - if isinstance(markup, bytes): - res = encoding_res[bytes] - else: -- res = encoding_res[unicode] -+ res = encoding_res[str] - - xml_re = res['xml'] - html_re = res['html'] -@@ -431,9 +431,9 @@ class UnicodeDammit: - markup, override_encodings, is_html, exclude_encodings) - - # Short-circuit if the data is in Unicode to begin with. -- if isinstance(markup, unicode) or markup == '': -+ if isinstance(markup, str) or markup == '': - self.markup = markup -- self.unicode_markup = unicode(markup) -+ self.unicode_markup = str(markup) - self.original_encoding = None - return - -@@ -523,7 +523,7 @@ class UnicodeDammit: - - :param encoding: The name of an encoding. - """ -- return unicode(data, encoding, errors) -+ return str(data, encoding, errors) - - @property - def declared_html_encoding(self): -diff --git a/bs4/diagnose.py b/bs4/diagnose.py -index e4f2f47..500e92d 100644 ---- a/bs4/diagnose.py -+++ b/bs4/diagnose.py -@@ -4,8 +4,8 @@ - __license__ = "MIT" - - import cProfile --from StringIO import StringIO --from HTMLParser import HTMLParser -+from io import StringIO -+from html.parser import HTMLParser - import bs4 - from bs4 import BeautifulSoup, __version__ - from bs4.builder import builder_registry -@@ -25,8 +25,8 @@ def diagnose(data): - :param data: A string containing markup that needs to be explained. - :return: None; diagnostics are printed to standard output. - """ -- print("Diagnostic running on Beautiful Soup %s" % __version__) -- print("Python version %s" % sys.version) -+ print(("Diagnostic running on Beautiful Soup %s" % __version__)) -+ print(("Python version %s" % sys.version)) - - basic_parsers = ["html.parser", "html5lib", "lxml"] - for name in basic_parsers: -@@ -35,16 +35,16 @@ def diagnose(data): - break - else: - basic_parsers.remove(name) -- print( -+ print(( - "I noticed that %s is not installed. Installing it may help." % -- name) -+ name)) - - if 'lxml' in basic_parsers: - basic_parsers.append("lxml-xml") - try: - from lxml import etree -- print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) -- except ImportError, e: -+ print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))) -+ except ImportError as e: - print( - "lxml is not installed or couldn't be imported.") - -@@ -52,21 +52,21 @@ def diagnose(data): - if 'html5lib' in basic_parsers: - try: - import html5lib -- print("Found html5lib version %s" % html5lib.__version__) -- except ImportError, e: -+ print(("Found html5lib version %s" % html5lib.__version__)) -+ except ImportError as e: - print( - "html5lib is not installed or couldn't be imported.") - - if hasattr(data, 'read'): - data = data.read() - elif data.startswith("http:") or data.startswith("https:"): -- print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) -+ print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)) - print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") - return - else: - try: - if os.path.exists(data): -- print('"%s" looks like a filename. Reading data from the file.' % data) -+ print(('"%s" looks like a filename. Reading data from the file.' % data)) - with open(data) as fp: - data = fp.read() - except ValueError: -@@ -76,19 +76,19 @@ def diagnose(data): - print("") - - for parser in basic_parsers: -- print("Trying to parse your markup with %s" % parser) -+ print(("Trying to parse your markup with %s" % parser)) - success = False - try: - soup = BeautifulSoup(data, features=parser) - success = True -- except Exception, e: -- print("%s could not parse the markup." % parser) -+ except Exception as e: -+ print(("%s could not parse the markup." % parser)) - traceback.print_exc() - if success: -- print("Here's what %s did with the markup:" % parser) -- print(soup.prettify()) -+ print(("Here's what %s did with the markup:" % parser)) -+ print((soup.prettify())) - -- print("-" * 80) -+ print(("-" * 80)) - - def lxml_trace(data, html=True, **kwargs): - """Print out the lxml events that occur during parsing. -@@ -104,7 +104,7 @@ def lxml_trace(data, html=True, **kwargs): - """ - from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): -- print("%s, %4s, %s" % (event, element.tag, element.text)) -+ print(("%s, %4s, %s" % (event, element.tag, element.text))) - - class AnnouncingParser(HTMLParser): - """Subclass of HTMLParser that announces parse events, without doing -@@ -193,9 +193,9 @@ def rdoc(num_elements=1000): - - def benchmark_parsers(num_elements=100000): - """Very basic head-to-head performance benchmark.""" -- print("Comparative parser benchmark on Beautiful Soup %s" % __version__) -+ print(("Comparative parser benchmark on Beautiful Soup %s" % __version__)) - data = rdoc(num_elements) -- print("Generated a large invalid HTML document (%d bytes)." % len(data)) -+ print(("Generated a large invalid HTML document (%d bytes)." % len(data))) - - for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: - success = False -@@ -204,24 +204,24 @@ def benchmark_parsers(num_elements=100000): - soup = BeautifulSoup(data, parser) - b = time.time() - success = True -- except Exception, e: -- print("%s could not parse the markup." % parser) -+ except Exception as e: -+ print(("%s could not parse the markup." % parser)) - traceback.print_exc() - if success: -- print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) -+ print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a))) - - from lxml import etree - a = time.time() - etree.HTML(data) - b = time.time() -- print("Raw lxml parsed the markup in %.2fs." % (b-a)) -+ print(("Raw lxml parsed the markup in %.2fs." % (b-a))) - - import html5lib - parser = html5lib.HTMLParser() - a = time.time() - parser.parse(data) - b = time.time() -- print("Raw html5lib parsed the markup in %.2fs." % (b-a)) -+ print(("Raw html5lib parsed the markup in %.2fs." % (b-a))) - - def profile(num_elements=100000, parser="lxml"): - """Use Python's profiler on a randomly generated document.""" -diff --git a/bs4/element.py b/bs4/element.py -index 09a81d9..81d9db9 100644 ---- a/bs4/element.py -+++ b/bs4/element.py -@@ -3,14 +3,14 @@ __license__ = "MIT" - - try: - from collections.abc import Callable # Python 3.6 --except ImportError , e: -+except ImportError as e: - from collections import Callable - import re - import sys - import warnings - try: - import soupsieve --except ImportError, e: -+except ImportError as e: - soupsieve = None - warnings.warn( - 'The soupsieve package is not installed. CSS selectors cannot be used.' -@@ -57,22 +57,22 @@ def _alias(attr): - # Source: - # https://docs.python.org/3/library/codecs.html#python-specific-encodings - PYTHON_SPECIFIC_ENCODINGS = set([ -- u"idna", -- u"mbcs", -- u"oem", -- u"palmos", -- u"punycode", -- u"raw_unicode_escape", -- u"undefined", -- u"unicode_escape", -- u"raw-unicode-escape", -- u"unicode-escape", -- u"string-escape", -- u"string_escape", -+ "idna", -+ "mbcs", -+ "oem", -+ "palmos", -+ "punycode", -+ "raw_unicode_escape", -+ "undefined", -+ "unicode_escape", -+ "raw-unicode-escape", -+ "unicode-escape", -+ "string-escape", -+ "string_escape", - ]) - - --class NamespacedAttribute(unicode): -+class NamespacedAttribute(str): - """A namespaced string (e.g. 'xml:lang') that remembers the namespace - ('xml') and the name ('lang') that were used to create it. - """ -@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode): - name = None - - if name is None: -- obj = unicode.__new__(cls, prefix) -+ obj = str.__new__(cls, prefix) - elif prefix is None: - # Not really namespaced. -- obj = unicode.__new__(cls, name) -+ obj = str.__new__(cls, name) - else: -- obj = unicode.__new__(cls, prefix + ":" + name) -+ obj = str.__new__(cls, prefix + ":" + name) - obj.prefix = prefix - obj.name = name - obj.namespace = namespace - return obj - --class AttributeValueWithCharsetSubstitution(unicode): -+class AttributeValueWithCharsetSubstitution(str): - """A stand-in object for a character encoding specified in HTML.""" - - class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): -@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """ - - def __new__(cls, original_value): -- obj = unicode.__new__(cls, original_value) -+ obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - -@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): - match = cls.CHARSET_RE.search(original_value) - if match is None: - # No substitution necessary. -- return unicode.__new__(unicode, original_value) -+ return str.__new__(str, original_value) - -- obj = unicode.__new__(cls, original_value) -+ obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - -@@ -376,7 +376,7 @@ class PageElement(object): - raise ValueError("Cannot insert None into a tag.") - if new_child is self: - raise ValueError("Cannot insert a tag into itself.") -- if (isinstance(new_child, basestring) -+ if (isinstance(new_child, str) - and not isinstance(new_child, NavigableString)): - new_child = NavigableString(new_child) - -@@ -753,7 +753,7 @@ class PageElement(object): - result = (element for element in generator - if isinstance(element, Tag)) - return ResultSet(strainer, result) -- elif isinstance(name, basestring): -+ elif isinstance(name, str): - # Optimization to find all tags with a given name. - if name.count(':') == 1: - # This is a name with a prefix. If this is a namespace-aware document, -@@ -872,7 +872,7 @@ class PageElement(object): - return self.parents - - --class NavigableString(unicode, PageElement): -+class NavigableString(str, PageElement): - """A Python Unicode string that is part of a parse tree. - - When Beautiful Soup parses the markup penguin, it will -@@ -895,10 +895,10 @@ class NavigableString(unicode, PageElement): - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ -- if isinstance(value, unicode): -- u = unicode.__new__(cls, value) -+ if isinstance(value, str): -+ u = str.__new__(cls, value) - else: -- u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) -+ u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - u.setup() - return u - -@@ -909,7 +909,7 @@ class NavigableString(unicode, PageElement): - return type(self)(self) - - def __getnewargs__(self): -- return (unicode(self),) -+ return (str(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards -@@ -975,30 +975,30 @@ class PreformattedString(NavigableString): - - class CData(PreformattedString): - """A CDATA block.""" -- PREFIX = u'' -+ PREFIX = '' - - class ProcessingInstruction(PreformattedString): - """A SGML processing instruction.""" - -- PREFIX = u'' -+ PREFIX = '' - - class XMLProcessingInstruction(ProcessingInstruction): - """An XML processing instruction.""" -- PREFIX = u'' -+ PREFIX = '' - - class Comment(PreformattedString): - """An HTML or XML comment.""" -- PREFIX = u'' -+ PREFIX = '' - - - class Declaration(PreformattedString): - """An XML declaration.""" -- PREFIX = u'' -+ PREFIX = '' - - - class Doctype(PreformattedString): -@@ -1026,8 +1026,8 @@ class Doctype(PreformattedString): - - return Doctype(value) - -- PREFIX = u'\n' -+ PREFIX = '\n' - - - class Stylesheet(NavigableString): -@@ -1263,7 +1263,7 @@ class Tag(PageElement): - for string in self._all_strings(True): - yield string - -- def get_text(self, separator=u"", strip=False, -+ def get_text(self, separator="", strip=False, - types=(NavigableString, CData)): - """Get all child strings, concatenated using the given separator. - -@@ -1416,7 +1416,7 @@ class Tag(PageElement): - def __contains__(self, x): - return x in self.contents - -- def __nonzero__(self): -+ def __bool__(self): - "A tag is non-None even if it has no contents." - return True - -@@ -1565,8 +1565,8 @@ class Tag(PageElement): - else: - if isinstance(val, list) or isinstance(val, tuple): - val = ' '.join(val) -- elif not isinstance(val, basestring): -- val = unicode(val) -+ elif not isinstance(val, str): -+ val = str(val) - elif ( - isinstance(val, AttributeValueWithCharsetSubstitution) - and eventual_encoding is not None -@@ -1575,7 +1575,7 @@ class Tag(PageElement): - - text = formatter.attribute_value(val) - decoded = ( -- unicode(key) + '=' -+ str(key) + '=' - + formatter.quoted_attribute_value(text)) - attrs.append(decoded) - close = '' -@@ -1934,7 +1934,7 @@ class SoupStrainer(object): - else: - attrs = kwargs - normalized_attrs = {} -- for key, value in attrs.items(): -+ for key, value in list(attrs.items()): - normalized_attrs[key] = self._normalize_search_value(value) - - self.attrs = normalized_attrs -@@ -1943,7 +1943,7 @@ class SoupStrainer(object): - def _normalize_search_value(self, value): - # Leave it alone if it's a Unicode string, a callable, a - # regular expression, a boolean, or None. -- if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match') -+ if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match') - or isinstance(value, bool) or value is None): - return value - -@@ -1956,7 +1956,7 @@ class SoupStrainer(object): - new_value = [] - for v in value: - if (hasattr(v, '__iter__') and not isinstance(v, bytes) -- and not isinstance(v, unicode)): -+ and not isinstance(v, str)): - # This is almost certainly the user's mistake. In the - # interests of avoiding infinite loops, we'll let - # it through as-is rather than doing a recursive call. -@@ -1968,7 +1968,7 @@ class SoupStrainer(object): - # Otherwise, convert it into a Unicode string. - # The unicode(str()) thing is so this will do the same thing on Python 2 - # and Python 3. -- return unicode(str(value)) -+ return str(str(value)) - - def __str__(self): - """A human-readable representation of this SoupStrainer.""" -@@ -1996,7 +1996,7 @@ class SoupStrainer(object): - markup = markup_name - markup_attrs = markup - -- if isinstance(self.name, basestring): -+ if isinstance(self.name, str): - # Optimization for a very common case where the user is - # searching for a tag with one specific name, and we're - # looking at a tag with a different name. -@@ -2052,7 +2052,7 @@ class SoupStrainer(object): - found = None - # If given a list of items, scan it for a text element that - # matches. -- if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)): -+ if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)): - for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): -@@ -2065,7 +2065,7 @@ class SoupStrainer(object): - found = self.search_tag(markup) - # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ -- isinstance(markup, basestring): -+ isinstance(markup, str): - if not self.name and not self.attrs and self._matches(markup, self.text): - found = markup - else: -@@ -2110,7 +2110,7 @@ class SoupStrainer(object): - return not match_against - - if (hasattr(match_against, '__iter__') -- and not isinstance(match_against, basestring)): -+ and not isinstance(match_against, str)): - # We're asked to match against an iterable of items. - # The markup must be match at least one item in the - # iterable. We'll try each one in turn. -@@ -2137,7 +2137,7 @@ class SoupStrainer(object): - # the tag's name and once against its prefixed name. - match = False - -- if not match and isinstance(match_against, unicode): -+ if not match and isinstance(match_against, str): - # Exact string match - match = markup == match_against - -diff --git a/bs4/formatter.py b/bs4/formatter.py -index 9a692ec..2cbab4c 100644 ---- a/bs4/formatter.py -+++ b/bs4/formatter.py -@@ -83,7 +83,7 @@ class Formatter(EntitySubstitution): - """ - if not self.entity_substitution: - return ns -- from element import NavigableString -+ from .element import NavigableString - if (isinstance(ns, NavigableString) - and ns.parent is not None - and ns.parent.name in self.cdata_containing_tags): -diff --git a/bs4/testing.py b/bs4/testing.py -index a2f83a1..9ca507b 100644 ---- a/bs4/testing.py -+++ b/bs4/testing.py -@@ -25,7 +25,7 @@ from bs4.element import ( - from bs4.builder import HTMLParserTreeBuilder - default_builder = HTMLParserTreeBuilder - --BAD_DOCUMENT = u"""A bare string -+BAD_DOCUMENT = """A bare string - - -
-@@ -94,7 +94,7 @@ class SoupTest(unittest.TestCase): - # Verify that every tag that was opened was eventually closed. - - # There are no tags in the open tag counter. -- assert all(v==0 for v in obj.open_tag_counter.values()) -+ assert all(v==0 for v in list(obj.open_tag_counter.values())) - - # The only tag in the tag stack is the one for the root - # document. -@@ -372,7 +372,7 @@ class HTMLTreeBuilderSmokeTest(object): - # process_markup correctly sets processing_instruction_class - # even when the markup is already Unicode and there is no - # need to process anything. -- markup = u"""""" -+ markup = """""" - soup = self.soup(markup) - self.assertEqual(markup, soup.decode()) - -@@ -544,14 +544,14 @@ Hello, world! - # "&T" and "&p" look like incomplete character entities, but they are - # not. - self.assertSoupEquals( -- u"

• AT&T is in the s&p 500

", -- u"

\u2022 AT&T is in the s&p 500

" -+ "

• AT&T is in the s&p 500

", -+ "

\u2022 AT&T is in the s&p 500

" - ) - - def test_apos_entity(self): - self.assertSoupEquals( -- u"

Bob's Bar

", -- u"

Bob's Bar

", -+ "

Bob's Bar

", -+ "

Bob's Bar

", - ) - - def test_entities_in_foreign_document_encoding(self): -@@ -564,17 +564,17 @@ Hello, world! - # characters. - markup = "

“Hello” -☃

" - soup = self.soup(markup) -- self.assertEquals(u"“Hello” -☃", soup.p.string) -+ self.assertEqual("“Hello” -☃", soup.p.string) - - def test_entities_in_attributes_converted_to_unicode(self): -- expect = u'

' -+ expect = '

' - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): -- expect = u'

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' -+ expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) -@@ -585,7 +585,7 @@ Hello, world! - '

I said "good day!"

') - - def test_out_of_range_entity(self): -- expect = u"\N{REPLACEMENT CHARACTER}" -+ expect = "\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) -@@ -663,9 +663,9 @@ Hello, world! - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! -- markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' -+ markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) -- self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) -+ self.assertEqual('Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" -@@ -705,7 +705,7 @@ Hello, world! - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "

<<sacré bleu!>>

" -- expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" -+ expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): -@@ -715,15 +715,15 @@ Hello, world! - soup = self.soup(quote) - self.assertEqual( - soup.p.string, -- u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") -+ "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup("  ") -- self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) -+ self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "

<<sacré bleu!>>

" -- expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") -+ expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - -@@ -732,7 +732,7 @@ Hello, world! - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. -- unicode_html = u'

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' -+ unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. -@@ -848,8 +848,8 @@ Hello, world! - soup = self.soup(markup) - for encoding in PYTHON_SPECIFIC_ENCODINGS: - if encoding in ( -- u'idna', u'mbcs', u'oem', u'undefined', -- u'string_escape', u'string-escape' -+ 'idna', 'mbcs', 'oem', 'undefined', -+ 'string_escape', 'string-escape' - ): - # For one reason or another, these will raise an - # exception if we actually try to use them, so don't -@@ -910,8 +910,8 @@ class XMLTreeBuilderSmokeTest(object): - soup = self.soup(markup) - for encoding in PYTHON_SPECIFIC_ENCODINGS: - if encoding in ( -- u'idna', u'mbcs', u'oem', u'undefined', -- u'string_escape', u'string-escape' -+ 'idna', 'mbcs', 'oem', 'undefined', -+ 'string_escape', 'string-escape' - ): - # For one reason or another, these will raise an - # exception if we actually try to use them, so don't -@@ -962,15 +962,15 @@ class XMLTreeBuilderSmokeTest(object): - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): -- markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' -+ markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) -- self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) -+ self.assertEqual('Sacr\xe9 bleu!', soup.root.string) - - def test_popping_namespaced_tag(self): - markup = 'b2012-07-02T20:33:42Zcd' - soup = self.soup(markup) - self.assertEqual( -- unicode(soup.rss), markup) -+ str(soup.rss), markup) - - def test_docstring_includes_correct_encoding(self): - soup = self.soup("") -@@ -1001,17 +1001,17 @@ class XMLTreeBuilderSmokeTest(object): - def test_closing_namespaced_tag(self): - markup = '

20010504

' - soup = self.soup(markup) -- self.assertEqual(unicode(soup.p), markup) -+ self.assertEqual(str(soup.p), markup) - - def test_namespaced_attributes(self): - markup = '' - soup = self.soup(markup) -- self.assertEqual(unicode(soup.foo), markup) -+ self.assertEqual(str(soup.foo), markup) - - def test_namespaced_attributes_xml_namespace(self): - markup = 'bar' - soup = self.soup(markup) -- self.assertEqual(unicode(soup.foo), markup) -+ self.assertEqual(str(soup.foo), markup) - - def test_find_by_prefixed_name(self): - doc = """ -diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py -index 7b0a6d4..b77659b 100644 ---- a/bs4/tests/test_html5lib.py -+++ b/bs4/tests/test_html5lib.py -@@ -5,7 +5,7 @@ import warnings - try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True --except ImportError, e: -+except ImportError as e: - HTML5LIB_PRESENT = False - from bs4.element import SoupStrainer - from bs4.testing import ( -@@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - def test_reparented_markup(self): - markup = '

foo

\n

bar

' - soup = self.soup(markup) -- self.assertEqual(u"

foo

\n

bar

", soup.body.decode()) -+ self.assertEqual("

foo

\n

bar

", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - - def test_reparented_markup_ends_with_whitespace(self): - markup = '

foo

\n

bar

\n' - soup = self.soup(markup) -- self.assertEqual(u"

foo

\n

bar

\n", soup.body.decode()) -+ self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - def test_reparented_markup_containing_identical_whitespace_nodes(self): -@@ -127,7 +127,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - def test_foster_parenting(self): - markup = b"""A""" - soup = self.soup(markup) -- self.assertEqual(u"A
", soup.body.decode()) -+ self.assertEqual("A
", soup.body.decode()) - - def test_extraction(self): - """ -diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py -index 7ee91aa..aeff094 100644 ---- a/bs4/tests/test_htmlparser.py -+++ b/bs4/tests/test_htmlparser.py -@@ -60,20 +60,20 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - # If you don't provide any particular value for - # on_duplicate_attribute, later values replace earlier values. - soup = self.soup(markup) -- self.assertEquals("url3", soup.a['href']) -- self.assertEquals(["cls"], soup.a['class']) -- self.assertEquals("id", soup.a['id']) -+ self.assertEqual("url3", soup.a['href']) -+ self.assertEqual(["cls"], soup.a['class']) -+ self.assertEqual("id", soup.a['id']) - - # You can also get this behavior explicitly. - def assert_attribute(on_duplicate_attribute, expected): - soup = self.soup( - markup, on_duplicate_attribute=on_duplicate_attribute - ) -- self.assertEquals(expected, soup.a['href']) -+ self.assertEqual(expected, soup.a['href']) - - # Verify that non-duplicate attributes are treated normally. -- self.assertEquals(["cls"], soup.a['class']) -- self.assertEquals("id", soup.a['id']) -+ self.assertEqual(["cls"], soup.a['class']) -+ self.assertEqual("id", soup.a['id']) - assert_attribute(None, "url3") - assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3") - -diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py -index f96e4ae..3d0c75f 100644 ---- a/bs4/tests/test_lxml.py -+++ b/bs4/tests/test_lxml.py -@@ -7,7 +7,7 @@ try: - import lxml.etree - LXML_PRESENT = True - LXML_VERSION = lxml.etree.LXML_VERSION --except ImportError, e: -+except ImportError as e: - LXML_PRESENT = False - LXML_VERSION = (0,) - -@@ -68,7 +68,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - # if one is installed. - with warnings.catch_warnings(record=True) as w: - soup = BeautifulStoneSoup("") -- self.assertEqual(u"", unicode(soup.b)) -+ self.assertEqual("", str(soup.b)) - self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) - - def test_tracking_line_numbers(self): -diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py -index 857eb41..e1035ea 100644 ---- a/bs4/tests/test_soup.py -+++ b/bs4/tests/test_soup.py -@@ -51,17 +51,17 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) - class TestConstructor(SoupTest): - - def test_short_unicode_input(self): -- data = u"

éé

" -+ data = "

éé

" - soup = self.soup(data) -- self.assertEqual(u"éé", soup.h1.string) -+ self.assertEqual("éé", soup.h1.string) - - def test_embedded_null(self): -- data = u"

foo\0bar

" -+ data = "

foo\0bar

" - soup = self.soup(data) -- self.assertEqual(u"foo\0bar", soup.h1.string) -+ self.assertEqual("foo\0bar", soup.h1.string) - - def test_exclude_encodings(self): -- utf8_data = u"Räksmörgås".encode("utf-8") -+ utf8_data = "Räksmörgås".encode("utf-8") - soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) - self.assertEqual("windows-1252", soup.original_encoding) - -@@ -127,7 +127,7 @@ class TestConstructor(SoupTest): - yield markup, None, None, False - - import re -- self.assertRaisesRegexp( -+ self.assertRaisesRegex( - ParserRejectedMarkup, - "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.", - BeautifulSoup, '', builder=Mock, -@@ -303,7 +303,7 @@ class TestWarnings(SoupTest): - with warnings.catch_warnings(record=True) as warning_list: - # note - this url must differ from the bytes one otherwise - # python's warnings system swallows the second warning -- soup = self.soup(u"http://www.crummyunicode.com/") -+ soup = self.soup("http://www.crummyunicode.com/") - warning = self._assert_warning( - warning_list, MarkupResemblesLocatorWarning - ) -@@ -319,7 +319,7 @@ class TestWarnings(SoupTest): - - def test_url_warning_with_unicode_and_space(self): - with warnings.catch_warnings(record=True) as warning_list: -- soup = self.soup(u"http://www.crummyuncode.com/ is great") -+ soup = self.soup("http://www.crummyuncode.com/ is great") - self.assertFalse(any("looks like a URL" in str(w.message) - for w in warning_list)) - -@@ -341,9 +341,9 @@ class TestEntitySubstitution(unittest.TestCase): - def test_simple_html_substitution(self): - # Unicode characters corresponding to named HTML entites - # are substituted, and no others. -- s = u"foo\u2200\N{SNOWMAN}\u00f5bar" -+ s = "foo\u2200\N{SNOWMAN}\u00f5bar" - self.assertEqual(self.sub.substitute_html(s), -- u"foo∀\N{SNOWMAN}õbar") -+ "foo∀\N{SNOWMAN}õbar") - - def test_smart_quote_substitution(self): - # MS smart quotes are a common source of frustration, so we -@@ -408,7 +408,7 @@ class TestEncodingConversion(SoupTest): - - def setUp(self): - super(TestEncodingConversion, self).setUp() -- self.unicode_data = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' -+ self.unicode_data = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - self.utf8_data = self.unicode_data.encode("utf-8") - # Just so you know what it looks like. - self.assertEqual( -@@ -428,7 +428,7 @@ class TestEncodingConversion(SoupTest): - ascii = b"a" - soup_from_ascii = self.soup(ascii) - unicode_output = soup_from_ascii.decode() -- self.assertTrue(isinstance(unicode_output, unicode)) -+ self.assertTrue(isinstance(unicode_output, str)) - self.assertEqual(unicode_output, self.document_for(ascii.decode())) - self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") - finally: -@@ -440,7 +440,7 @@ class TestEncodingConversion(SoupTest): - # is not set. - soup_from_unicode = self.soup(self.unicode_data) - self.assertEqual(soup_from_unicode.decode(), self.unicode_data) -- self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') -+ self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!') - self.assertEqual(soup_from_unicode.original_encoding, None) - - def test_utf8_in_unicode_out(self): -@@ -448,7 +448,7 @@ class TestEncodingConversion(SoupTest): - # attribute is set. - soup_from_utf8 = self.soup(self.utf8_data) - self.assertEqual(soup_from_utf8.decode(), self.unicode_data) -- self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') -+ self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!') - - def test_utf8_out(self): - # The internal data structures can be encoded as UTF-8. -@@ -459,7 +459,7 @@ class TestEncodingConversion(SoupTest): - PYTHON_3_PRE_3_2, - "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") - def test_attribute_name_containing_unicode_characters(self): -- markup = u'
' -+ markup = '
' - self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) - - class TestUnicodeDammit(unittest.TestCase): -@@ -526,7 +526,7 @@ class TestUnicodeDammit(unittest.TestCase): - - def test_exclude_encodings(self): - # This is UTF-8. -- utf8_data = u"Räksmörgås".encode("utf-8") -+ utf8_data = "Räksmörgås".encode("utf-8") - - # But if we exclude UTF-8 from consideration, the guess is - # Windows-1252. -diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py -index 2246346..b4f2a86 100644 ---- a/bs4/tests/test_tree.py -+++ b/bs4/tests/test_tree.py -@@ -75,13 +75,13 @@ class TestFind(TreeTest): - self.assertEqual(soup.find("b").string, "2") - - def test_unicode_text_find(self): -- soup = self.soup(u'

Räksmörgås

') -- self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås') -+ soup = self.soup('

Räksmörgås

') -+ self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') - - def test_unicode_attribute_find(self): -- soup = self.soup(u'

here it is

') -+ soup = self.soup('

here it is

') - str(soup) -- self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) -+ self.assertEqual("here it is", soup.find(id='Räksmörgås').text) - - - def test_find_everything(self): -@@ -101,17 +101,17 @@ class TestFindAll(TreeTest): - """You can search the tree for text nodes.""" - soup = self.soup("Foobar\xbb") - # Exact match. -- self.assertEqual(soup.find_all(string="bar"), [u"bar"]) -- self.assertEqual(soup.find_all(text="bar"), [u"bar"]) -+ self.assertEqual(soup.find_all(string="bar"), ["bar"]) -+ self.assertEqual(soup.find_all(text="bar"), ["bar"]) - # Match any of a number of strings. - self.assertEqual( -- soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) -+ soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) - # Match a regular expression. - self.assertEqual(soup.find_all(text=re.compile('.*')), -- [u"Foo", u"bar", u'\xbb']) -+ ["Foo", "bar", '\xbb']) - # Match anything. - self.assertEqual(soup.find_all(text=True), -- [u"Foo", u"bar", u'\xbb']) -+ ["Foo", "bar", '\xbb']) - - def test_find_all_limit(self): - """You can limit the number of items returned by find_all.""" -@@ -254,8 +254,8 @@ class TestFindAllByAttribute(TreeTest): - ["Matching a.", "Matching b."]) - - def test_find_all_by_utf8_attribute_value(self): -- peace = u"םולש".encode("utf8") -- data = u''.encode("utf8") -+ peace = "םולש".encode("utf8") -+ data = ''.encode("utf8") - soup = self.soup(data) - self.assertEqual([soup.a], soup.find_all(title=peace)) - self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) -@@ -444,7 +444,7 @@ class TestSmooth(TreeTest): - # output. - - # Since the tag has two children, its .string is None. -- self.assertEquals(None, div.span.string) -+ self.assertEqual(None, div.span.string) - - self.assertEqual(7, len(div.contents)) - div.smooth() -@@ -755,18 +755,18 @@ class TestTag(SoupTest): - - # No list of whitespace-preserving tags -> pretty-print - tag._preserve_whitespace_tags = None -- self.assertEquals(True, tag._should_pretty_print(0)) -+ self.assertEqual(True, tag._should_pretty_print(0)) - - # List exists but tag is not on the list -> pretty-print - tag.preserve_whitespace_tags = ["some_other_tag"] -- self.assertEquals(True, tag._should_pretty_print(1)) -+ self.assertEqual(True, tag._should_pretty_print(1)) - - # Indent level is None -> don't pretty-print -- self.assertEquals(False, tag._should_pretty_print(None)) -+ self.assertEqual(False, tag._should_pretty_print(None)) - - # Tag is on the whitespace-preserving list -> don't pretty-print - tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"] -- self.assertEquals(False, tag._should_pretty_print(1)) -+ self.assertEqual(False, tag._should_pretty_print(1)) - - - class TestTagCreation(SoupTest): -@@ -905,10 +905,10 @@ class TestTreeModification(SoupTest): - assert not isinstance(i, BeautifulSoup) - - p1, p2, p3, p4 = list(soup.children) -- self.assertEquals("And now, a word:", p1.string) -- self.assertEquals("p2", p2.string) -- self.assertEquals("p3", p3.string) -- self.assertEquals("And we're back.", p4.string) -+ self.assertEqual("And now, a word:", p1.string) -+ self.assertEqual("p2", p2.string) -+ self.assertEqual("p3", p3.string) -+ self.assertEqual("And we're back.", p4.string) - - - def test_replace_with_maintains_next_element_throughout(self): -@@ -1015,8 +1015,8 @@ class TestTreeModification(SoupTest): - d1 = soup.find('div', id='d1') - d2 = soup.find('div', id='d2') - d2.extend(d1) -- self.assertEqual(u'
', d1.decode()) -- self.assertEqual(u'', d2.decode()) -+ self.assertEqual('
', d1.decode()) -+ self.assertEqual('', d2.decode()) - - def test_move_tag_to_beginning_of_parent(self): - data = "" -@@ -1262,7 +1262,7 @@ class TestTreeModification(SoupTest): - - """) - [soup.script.extract() for i in soup.find_all("script")] -- self.assertEqual("\n\n\n", unicode(soup.body)) -+ self.assertEqual("\n\n\n", str(soup.body)) - - - def test_extract_works_when_element_is_surrounded_by_identical_strings(self): -@@ -1524,7 +1524,7 @@ class TestPersistence(SoupTest): - soup = BeautifulSoup(b'

 

', 'html.parser') - encoding = soup.original_encoding - copy = soup.__copy__() -- self.assertEqual(u"

 

", unicode(copy)) -+ self.assertEqual("

 

", str(copy)) - self.assertEqual(encoding, copy.original_encoding) - - def test_copy_preserves_builder_information(self): -@@ -1554,14 +1554,14 @@ class TestPersistence(SoupTest): - - def test_unicode_pickle(self): - # A tree containing Unicode characters can be pickled. -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.decode(), soup.decode()) - - def test_copy_navigablestring_is_not_attached_to_tree(self): -- html = u"FooBar" -+ html = "FooBar" - soup = self.soup(html) - s1 = soup.find(string="Foo") - s2 = copy.copy(s1) -@@ -1573,7 +1573,7 @@ class TestPersistence(SoupTest): - self.assertEqual(None, s2.previous_element) - - def test_copy_navigablestring_subclass_has_same_type(self): -- html = u"" -+ html = "" - soup = self.soup(html) - s1 = soup.string - s2 = copy.copy(s1) -@@ -1581,19 +1581,19 @@ class TestPersistence(SoupTest): - self.assertTrue(isinstance(s2, Comment)) - - def test_copy_entire_soup(self): -- html = u"
FooBar
end" -+ html = "
FooBar
end" - soup = self.soup(html) - soup_copy = copy.copy(soup) - self.assertEqual(soup, soup_copy) - - def test_copy_tag_copies_contents(self): -- html = u"
FooBar
end" -+ html = "
FooBar
end" - soup = self.soup(html) - div = soup.div - div_copy = copy.copy(div) - - # The two tags look the same, and evaluate to equal. -- self.assertEqual(unicode(div), unicode(div_copy)) -+ self.assertEqual(str(div), str(div_copy)) - self.assertEqual(div, div_copy) - - # But they're not the same object. -@@ -1609,17 +1609,17 @@ class TestPersistence(SoupTest): - class TestSubstitutions(SoupTest): - - def test_default_formatter_is_minimal(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( -- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) -+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_html(self): -- markup = u"
<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "
<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html") - self.assertEqual( -@@ -1627,7 +1627,7 @@ class TestSubstitutions(SoupTest): - self.document_for("
<<Sacré bleu!>>")) - - def test_formatter_html5(self): -- markup = u"
<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "
<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html5") - self.assertEqual( -@@ -1635,49 +1635,49 @@ class TestSubstitutions(SoupTest): - self.document_for("
<<Sacré bleu!>>")) - - def test_formatter_minimal(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( -- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) -+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_null(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter=None) - # Neither the angle brackets nor the e-with-acute are converted. - # This is not valid HTML, but it's what the user wanted. - self.assertEqual(decoded, -- self.document_for(u"<>")) -+ self.document_for("<>")) - - def test_formatter_custom(self): -- markup = u"<foo>bar
" -+ markup = "<foo>bar
" - soup = self.soup(markup) - decoded = soup.decode(formatter = lambda x: x.upper()) - # Instead of normal entity conversion code, the custom - # callable is called on every string. - self.assertEqual( - decoded, -- self.document_for(u"BAR
")) -+ self.document_for("BAR
")) - - def test_formatter_is_run_on_attribute_values(self): -- markup = u'e' -+ markup = 'e' - soup = self.soup(markup) - a = soup.a - -- expect_minimal = u'e' -+ expect_minimal = 'e' - - self.assertEqual(expect_minimal, a.decode()) - self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - -- expect_html = u'e' -+ expect_html = 'e' - self.assertEqual(expect_html, a.decode(formatter="html")) - - self.assertEqual(markup, a.decode(formatter=None)) -- expect_upper = u'E' -+ expect_upper = 'E' - self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) - - def test_formatter_skips_script_tag_for_html_documents(self): -@@ -1703,7 +1703,7 @@ class TestSubstitutions(SoupTest): - # Everything outside the
 tag is reformatted, but everything
-         # inside is left alone.
-         self.assertEqual(
--            u'
\n foo\n
  \tbar\n  \n  
\n baz\n \n
', -+ '
\n foo\n
  \tbar\n  \n  
\n baz\n \n
', - soup.div.prettify()) - - def test_prettify_accepts_formatter_function(self): -@@ -1713,14 +1713,14 @@ class TestSubstitutions(SoupTest): - - def test_prettify_outputs_unicode_by_default(self): - soup = self.soup("") -- self.assertEqual(unicode, type(soup.prettify())) -+ self.assertEqual(str, type(soup.prettify())) - - def test_prettify_can_encode_data(self): - soup = self.soup("") - self.assertEqual(bytes, type(soup.prettify("utf-8"))) - - def test_html_entity_substitution_off_by_default(self): -- markup = u"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!" -+ markup = "Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!" - soup = self.soup(markup) - encoded = soup.b.encode("utf-8") - self.assertEqual(encoded, markup.encode('utf-8')) -@@ -1764,48 +1764,48 @@ class TestEncoding(SoupTest): - """Test the ability to encode objects into strings.""" - - def test_unicode_string_can_be_encoded(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.string.encode("utf-8"), -- u"\N{SNOWMAN}".encode("utf-8")) -+ "\N{SNOWMAN}".encode("utf-8")) - - def test_tag_containing_unicode_string_can_be_encoded(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - soup.b.encode("utf-8"), html.encode("utf-8")) - - def test_encoding_substitutes_unrecognized_characters_by_default(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.encode("ascii"), b"") - - def test_encoding_can_be_made_strict(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertRaises( - UnicodeEncodeError, soup.encode, "ascii", errors="strict") - - def test_decode_contents(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) -- self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) -+ self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents()) - - def test_encode_contents(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( -- u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( -+ "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( - encoding="utf8")) - - def test_deprecated_renderContents(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( -- u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) -+ "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) - - def test_repr(self): -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - if PY3K: - self.assertEqual(html, repr(soup)) -@@ -1993,7 +1993,7 @@ class TestSoupSelector(TreeTest): - els = self.soup.select('title') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].name, 'title') -- self.assertEqual(els[0].contents, [u'The title']) -+ self.assertEqual(els[0].contents, ['The title']) - - def test_one_tag_many(self): - els = self.soup.select('div') -@@ -2039,7 +2039,7 @@ class TestSoupSelector(TreeTest): - self.assertEqual(dashed[0]['id'], 'dash2') - - def test_dashed_tag_text(self): -- self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.') -+ self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.') - - def test_select_dashed_matches_find_all(self): - self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) -@@ -2225,12 +2225,12 @@ class TestSoupSelector(TreeTest): - # Try to select first paragraph - els = self.soup.select('div#inner p:nth-of-type(1)') - self.assertEqual(len(els), 1) -- self.assertEqual(els[0].string, u'Some text') -+ self.assertEqual(els[0].string, 'Some text') - - # Try to select third paragraph - els = self.soup.select('div#inner p:nth-of-type(3)') - self.assertEqual(len(els), 1) -- self.assertEqual(els[0].string, u'Another') -+ self.assertEqual(els[0].string, 'Another') - - # Try to select (non-existent!) fourth paragraph - els = self.soup.select('div#inner p:nth-of-type(4)') -@@ -2243,7 +2243,7 @@ class TestSoupSelector(TreeTest): - def test_nth_of_type_direct_descendant(self): - els = self.soup.select('div#inner > p:nth-of-type(1)') - self.assertEqual(len(els), 1) -- self.assertEqual(els[0].string, u'Some text') -+ self.assertEqual(els[0].string, 'Some text') - - def test_id_child_selector_nth_of_type(self): - self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) -@@ -2324,7 +2324,7 @@ class TestSoupSelector(TreeTest): - markup = '
' - soup = BeautifulSoup(markup, 'html.parser') - selected = soup.select(".c1, .c2") -- self.assertEquals(3, len(selected)) -+ self.assertEqual(3, len(selected)) - - # Verify that find_all finds the same elements, though because - # of an implementation detail it finds them in a different -diff --git a/convert-py3k b/convert-py3k -deleted file mode 100755 -index 05fab53..0000000 ---- a/convert-py3k -+++ /dev/null -@@ -1,16 +0,0 @@ --#!/bin/sh --# --# The Python 2 source is the definitive source. This script uses 2to3-3.2 to --# create a new python3/bs4 source tree that works under Python 3. --# --# See README.txt to see how to run the test suite after conversion. --echo "About to destroy and rebuild the py3k/bs4 directory." --echo "If you've got stuff in there, Ctrl-C out of this script or answer 'n'." --mkdir -p py3k --rm -rfI py3k/bs4 --cp -r bs4/ py3k/ --2to3 -w py3k --echo "" --echo "OK, conversion is done." --echo "Now running the unit tests." --(cd py3k && python3 -m unittest discover -s bs4) -\ No newline at end of file -diff --git a/doc/source/index.rst b/doc/source/index.rst -index 34ec7cf..b8ca011 100644 ---- a/doc/source/index.rst -+++ b/doc/source/index.rst -@@ -170,16 +170,13 @@ Installing Beautiful Soup - If you're using a recent version of Debian or Ubuntu Linux, you can - install Beautiful Soup with the system package manager: - --:kbd:`$ apt-get install python-bs4` (for Python 2) -- --:kbd:`$ apt-get install python3-bs4` (for Python 3) -+:kbd:`$ apt-get install python3-bs4` - - Beautiful Soup 4 is published through PyPi, so if you can't install it - with the system packager, you can install it with ``easy_install`` or --``pip``. The package name is ``beautifulsoup4``, and the same package --works on Python 2 and Python 3. Make sure you use the right version of --``pip`` or ``easy_install`` for your Python version (these may be named --``pip3`` and ``easy_install3`` respectively if you're using Python 3). -+``pip``. The package name is ``beautifulsoup4``. Make sure you use the -+right version of ``pip`` or ``easy_install`` for your Python version -+(these may be named ``pip3`` and ``easy_install3`` respectively). - - :kbd:`$ easy_install beautifulsoup4` - -@@ -202,40 +199,8 @@ package the entire library with your application. You can download the - tarball, copy its ``bs4`` directory into your application's codebase, - and use Beautiful Soup without installing it at all. - --I use Python 2.7 and Python 3.8 to develop Beautiful Soup, but it --should work with other recent versions. -- --Problems after installation ----------------------------- -- --Beautiful Soup is packaged as Python 2 code. When you install it for --use with Python 3, it's automatically converted to Python 3 code. If --you don't install the package, the code won't be converted. There have --also been reports on Windows machines of the wrong version being --installed. -- --If you get the ``ImportError`` "No module named HTMLParser", your --problem is that you're running the Python 2 version of the code under --Python 3. -- --If you get the ``ImportError`` "No module named html.parser", your --problem is that you're running the Python 3 version of the code under --Python 2. -- --In both cases, your best bet is to completely remove the Beautiful --Soup installation from your system (including any directory created --when you unzipped the tarball) and try the installation again. -- --If you get the ``SyntaxError`` "Invalid syntax" on the line --``ROOT_TAG_NAME = u'[document]'``, you need to convert the Python 2 --code to Python 3. You can do this either by installing the package: -- --:kbd:`$ python3 setup.py install` -- --or by manually running Python's ``2to3`` conversion script on the --``bs4`` directory: -- --:kbd:`$ 2to3-3.2 -w bs4` -+I use Python 3.8 to develop Beautiful Soup, but it should work with -+other recent versions. - - .. _parser-installation: - -@@ -272,8 +237,7 @@ This table summarizes the advantages and disadvantages of each parser library: - +----------------------+--------------------------------------------+--------------------------------+--------------------------+ - | Python's html.parser | ``BeautifulSoup(markup, "html.parser")`` | * Batteries included | * Not as fast as lxml, | - | | | * Decent speed | less lenient than | --| | | * Lenient (As of Python 2.7.3 | html5lib. | --| | | and 3.2.) | | -+| | | * Lenient (As of Python 3.2) | html5lib. | - +----------------------+--------------------------------------------+--------------------------------+--------------------------+ - | lxml's HTML parser | ``BeautifulSoup(markup, "lxml")`` | * Very fast | * External C dependency | - | | | * Lenient | | -@@ -289,9 +253,9 @@ This table summarizes the advantages and disadvantages of each parser library: - +----------------------+--------------------------------------------+--------------------------------+--------------------------+ - - If you can, I recommend you install and use lxml for speed. If you're --using a very old version of Python -- earlier than 2.7.3 or 3.2.2 -- --it's `essential` that you install lxml or html5lib. Python's built-in --HTML parser is just not very good in those old versions. -+using a very old version of Python -- earlier than 3.2.2 -- it's -+`essential` that you install lxml or html5lib. Python's built-in HTML -+parser is just not very good in those old versions. - - Note that if a document is invalid, different parsers will generate - different Beautiful Soup trees for it. See `Differences -@@ -481,8 +445,7 @@ uses the ``NavigableString`` class to contain these bits of text:: - A ``NavigableString`` is just like a Python Unicode string, except - that it also supports some of the features described in `Navigating - the tree`_ and `Searching the tree`_. You can convert a --``NavigableString`` to a Unicode string with ``unicode()`` (in --Python 2) or ``str`` (in Python 3):: -+``NavigableString`` to a Unicode string with ``str``:: - - unicode_string = str(tag.string) - unicode_string -@@ -2230,8 +2193,7 @@ Non-pretty printing - ------------------- - - If you just want a string, with no fancy formatting, you can call --``str()`` on a ``BeautifulSoup`` object (``unicode()`` in Python 2), --or on a ``Tag`` within it:: -+``str()`` on a ``BeautifulSoup`` object, or on a ``Tag`` within it:: - - str(soup) - # 'I linked to example.com' -@@ -3139,10 +3101,10 @@ Version mismatch problems - ------------------------- - - * ``SyntaxError: Invalid syntax`` (on the line ``ROOT_TAG_NAME = -- '[document]'``): Caused by running the Python 2 version of -+ '[document]'``): Caused by running an old Python 2 version of - Beautiful Soup under Python 3, without converting the code. - --* ``ImportError: No module named HTMLParser`` - Caused by running the -+* ``ImportError: No module named HTMLParser`` - Caused by running an old - Python 2 version of Beautiful Soup under Python 3. - - * ``ImportError: No module named html.parser`` - Caused by running the -diff --git a/setup.py b/setup.py -index 7b4b393..b9b4ed2 100644 ---- a/setup.py -+++ b/setup.py -@@ -4,23 +4,22 @@ from setuptools import ( - ) - import sys - -+from bs4 import __version__ -+ - with open("README.md", "r") as fh: - long_description = fh.read() - - setup( - name="beautifulsoup4", -- # NOTE: We can't import __version__ from bs4 because bs4/__init__.py is Python 2 code, -- # and converting it to Python 3 means going through this code to run 2to3. -- # So we have to specify it twice for the time being. -- version = '4.9.3', -+ version = __version__, - author="Leonard Richardson", - author_email='leonardr@segfault.org', - url="http://www.crummy.com/software/BeautifulSoup/bs4/", - download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/", - description="Screen-scraping library", -+ python_requires='>3.0.0', - install_requires=[ -- "soupsieve >1.2; python_version>='3.0'", -- "soupsieve >1.2, <2.0; python_version<'3.0'", -+ "soupsieve >1.2", - ], - long_description=long_description, - long_description_content_type="text/markdown", -@@ -30,12 +29,10 @@ setup( - 'lxml' : [ 'lxml'], - 'html5lib' : ['html5lib'], - }, -- use_2to3 = True, - classifiers=["Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python", -- "Programming Language :: Python :: 2.7", - 'Programming Language :: Python :: 3', - "Topic :: Text Processing :: Markup :: HTML", - "Topic :: Text Processing :: Markup :: XML", -diff --git a/test-all-versions b/test-all-versions -index 01e436b..fe7758a 100755 ---- a/test-all-versions -+++ b/test-all-versions -@@ -1 +1 @@ --python2.7 -m unittest discover -s bs4 && ./convert-py3k -+python3 -m unittest discover -s bs4 --- -2.13.7 - diff --git a/beautifulsoup4-4.10.0.tar.gz b/beautifulsoup4-4.10.0.tar.gz new file mode 100644 index 0000000..308d283 Binary files /dev/null and b/beautifulsoup4-4.10.0.tar.gz differ diff --git a/beautifulsoup4-4.9.3.tar.gz b/beautifulsoup4-4.9.3.tar.gz deleted file mode 100644 index 7ef59d3..0000000 Binary files a/beautifulsoup4-4.9.3.tar.gz and /dev/null differ diff --git a/python-beautifulsoup4.spec b/python-beautifulsoup4.spec index 21656d9..3fb10fd 100644 --- a/python-beautifulsoup4.spec +++ b/python-beautifulsoup4.spec @@ -1,50 +1,98 @@ +%global _empty_manifest_terminate_build 0 Name: python-beautifulsoup4 -Version: 4.9.3 -Release: 2 -Summary: HTML/XML parser for quick-turnaround projects +Version: 4.10.0 +Release: 1 +Summary: Screen-scraping library License: MIT -URL: http://www.crummy.com/software/BeautifulSoup/ -Source0: https://files.pythonhosted.org/packages/source/b/beautifulsoup4/beautifulsoup4-%{version}.tar.gz +URL: http://www.crummy.com/software/BeautifulSoup/bs4/ +Source0: https://files.pythonhosted.org/packages/a1/69/daeee6d8f22c997e522cdbeb59641c4d31ab120aba0f2c799500f7456b7e/beautifulsoup4-4.10.0.tar.gz BuildArch: noarch -BuildRequires: python3-devel python3-html5lib python3-setuptools python3-lxml -BuildRequires: python3-soupsieve - -Patch6000: backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch - -%global _description\ -This package provides a python library which is designed for quick\ -turnaround projects.It provides methods for navigating and modifying\ -a parse tree.It can help convert incoming documents to Unicode\ +%description +This package provides a python library which is designed for quick +turnaround projects.It provides methods for navigating and modifying +a parse tree.It can help convert incoming documents to Unicode and outgoing documents to utf-8. -%description %_description - -%package -n python3-beautifulsoup4 -Summary: %summary -%description -n python3-beautifulsoup4 %_description -Obsoletes: python3-BeautifulSoup < 1:3.2.1-2 +%package -n python3-beautifulsoup4 +Summary: Screen-scraping library +Provides: python-beautifulsoup4 +# Base build requires +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pbr +BuildRequires: python3-pip +BuildRequires: python3-wheel +# General requires +BuildRequires: python3-soupsieve +BuildRequires: python3-html5lib +BuildRequires: python3-lxml +# General requires +Requires: python3-soupsieve +Requires: python3-html5lib Requires: python3-lxml +%description -n python3-beautifulsoup4 +This package provides a python library which is designed for quick +turnaround projects.It provides methods for navigating and modifying +a parse tree.It can help convert incoming documents to Unicode +and outgoing documents to utf-8. + +%package help +Summary: Screen-scraping library +Provides: python3-beautifulsoup4-doc +%description help +This package provides a python library which is designed for quick +turnaround projects.It provides methods for navigating and modifying +a parse tree.It can help convert incoming documents to Unicode +and outgoing documents to utf-8. %prep -%setup -q -n beautifulsoup4-%{version} -%patch6000 -p1 +%autosetup -n beautifulsoup4-%{version} %build -%{py3_build} +%py3_build %install -%{py3_install} +%py3_install + +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . %check -%{__python3} -m unittest discover -s bs4 || : +%{__python3} setup.py test -%files -n python3-beautifulsoup4 -%license COPYING.txt -%doc NEWS.txt TODO.txt -%{python3_sitelib}/beautifulsoup4-%{version}*.egg-info -%{python3_sitelib}/bs4 +%files -n python3-beautifulsoup4 -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* %changelog +* Tue May 24 2022 renliang - 4.10.0-1 +- Upgrade package python3-beautifulsoup4 to version 4.10.0 + * Mon Jan 10 2022 shixuantong - 4.9.3-2 - converts the code base to Python 3, and removes the use_2to3 reference in setup.py.