From 91827993fb8bece431f1d8831468aa43b6f3dfef Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong@huawei.com>
Date: Mon, 10 Jan 2022 17:58:01 +0800
Subject: [PATCH] converts the code base to Python 3, and removes the use_2to3
 reference in setup.py.

---
 ...e-base-to-Python-3-and-removes-the-u.patch | 2062 +++++++++++++++++
 python-beautifulsoup4.spec                    |   14 +-
 2 files changed, 2070 insertions(+), 6 deletions(-)
 create mode 100644 backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch

diff --git a/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch b/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch
new file mode 100644
index 0000000..1cab095
--- /dev/null
+++ b/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch
@@ -0,0 +1,2062 @@
+diff --git a/README.md b/README.md
+index 92dd339..884f9eb 100644
+--- a/README.md
++++ b/README.md
+@@ -53,17 +53,11 @@ To go beyond the basics, [comprehensive documentation is available](http://www.c
+ 
+ # Note on Python 2 sunsetting
+ 
+-Since 2012, Beautiful Soup has been developed as a Python 2 library
+-which is automatically converted to Python 3 code as necessary. This
+-makes it impossible to take advantage of some features of Python
+-3.
+-
+-For this reason, I plan to discontinue Beautiful Soup's Python 2
+-support at some point after December 31, 2020: one year after the
+-sunset date for Python 2 itself. Beyond that point, new Beautiful Soup
+-development will exclusively target Python 3. Of course, older
+-releases of Beautiful Soup, which support both versions, will continue
+-to be available.
++Beautiful Soup's support for Python 2 was discontinued on December 31,
++2020: one year after the sunset date for Python 2 itself. From this
++point onward, new Beautiful Soup development will exclusively target
++Python 3. The final release of Beautiful Soup 4 to support Python 2
++was 4.9.3.
+ 
+ # Supporting the project
+ 
+@@ -93,10 +87,5 @@ $ nosetests
+ ```
+ 
+ ```
+-$ python -m unittest discover -s bs4
++$ python3 -m unittest discover -s bs4
+ ```
+-
+-If you checked out the source tree, you should see a script in the
+-home directory called test-all-versions. This script will run the unit
+-tests under Python 2, then create a temporary Python 3 conversion of
+-the source and run the unit tests again under Python 3.
+diff --git a/bs4/__init__.py b/bs4/__init__.py
+index 8f78809..51ccc21 100644
+--- a/bs4/__init__.py
++++ b/bs4/__init__.py
+@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
+ provides methods and Pythonic idioms that make it easy to navigate,
+ search, and modify the parse tree.
+ 
+-Beautiful Soup works with Python 2.7 and up. It works better if lxml
++Beautiful Soup works with Python 3.5 and up. It works better if lxml
+ and/or html5lib is installed.
+ 
+ For more than you ever wanted to know about Beautiful Soup, see the
+@@ -29,6 +29,11 @@ import sys
+ import traceback
+ import warnings
+ 
++# The very first thing we do is give a useful error if someone is
++# running this code under Python 2.
++if sys.version_info.major < 3:
++    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
++
+ from .builder import builder_registry, ParserRejectedMarkup
+ from .dammit import UnicodeDammit
+ from .element import (
+@@ -49,10 +54,6 @@ from .element import (
+     TemplateString,
+     )
+ 
+-# The very first thing we do is give a useful error if someone is
+-# running this code under Python 3 without converting it.
+-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+-
+ # Define some custom warnings.
+ class GuessedAtParserWarning(UserWarning):
+     """The warning issued when BeautifulSoup has to guess what parser to
+@@ -100,7 +101,7 @@ class BeautifulSoup(Tag):
+     # Since BeautifulSoup subclasses Tag, it's possible to treat it as
+     # a Tag with a .name. This name makes it clear the BeautifulSoup
+     # object isn't a real markup tag.
+-    ROOT_TAG_NAME = u'[document]'
++    ROOT_TAG_NAME = '[document]'
+ 
+     # If the end-user gives no indication which tree builder they
+     # want, look for one with these features.
+@@ -217,7 +218,7 @@ class BeautifulSoup(Tag):
+         from_encoding = from_encoding or deprecated_argument(
+             "fromEncoding", "from_encoding")
+ 
+-        if from_encoding and isinstance(markup, unicode):
++        if from_encoding and isinstance(markup, str):
+             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
+             from_encoding = None
+ 
+@@ -234,7 +235,7 @@ class BeautifulSoup(Tag):
+             builder_class = builder
+             builder = None
+         elif builder is None:
+-            if isinstance(features, basestring):
++            if isinstance(features, str):
+                 features = [features]
+             if features is None or len(features) == 0:
+                 features = self.DEFAULT_BUILDER_FEATURES
+@@ -309,13 +310,13 @@ class BeautifulSoup(Tag):
+             markup = markup.read()
+         elif len(markup) <= 256 and (
+                 (isinstance(markup, bytes) and not b'<' in markup)
+-                or (isinstance(markup, unicode) and not u'<' in markup)
++                or (isinstance(markup, str) and not '<' in markup)
+         ):
+             # Print out warnings for a couple beginner problems
+             # involving passing non-markup to Beautiful Soup.
+             # Beautiful Soup will still parse the input as markup,
+             # just in case that's what the user really wants.
+-            if (isinstance(markup, unicode)
++            if (isinstance(markup, str)
+                 and not os.path.supports_unicode_filenames):
+                 possible_filename = markup.encode("utf8")
+             else:
+@@ -323,7 +324,7 @@ class BeautifulSoup(Tag):
+             is_file = False
+             try:
+                 is_file = os.path.exists(possible_filename)
+-            except Exception, e:
++            except Exception as e:
+                 # This is almost certainly a problem involving
+                 # characters not valid in filenames on this
+                 # system. Just let it go.
+@@ -353,9 +354,9 @@ class BeautifulSoup(Tag):
+                 pass
+ 
+         if not success:
+-            other_exceptions = [unicode(e) for e in rejections]
++            other_exceptions = [str(e) for e in rejections]
+             raise ParserRejectedMarkup(
+-                u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
++                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+             )
+ 
+         # Clear out the markup and remove the builder's circular
+@@ -406,9 +407,9 @@ class BeautifulSoup(Tag):
+         if isinstance(markup, bytes):
+             space = b' '
+             cant_start_with = (b"http:", b"https:")
+-        elif isinstance(markup, unicode):
+-            space = u' '
+-            cant_start_with = (u"http:", u"https:")
++        elif isinstance(markup, str):
++            space = ' '
++            cant_start_with = ("http:", "https:")
+         else:
+             return
+ 
+@@ -545,7 +546,7 @@ class BeautifulSoup(Tag):
+         containerClass = self.string_container(containerClass)
+         
+         if self.current_data:
+-            current_data = u''.join(self.current_data)
++            current_data = ''.join(self.current_data)
+             # If whitespace is not preserved, and this string contains
+             # nothing but ASCII spaces, replace it with a single space
+             # or newline.
+@@ -748,9 +749,9 @@ class BeautifulSoup(Tag):
+                 eventual_encoding = None
+             if eventual_encoding != None:
+                 encoding_part = ' encoding="%s"' % eventual_encoding
+-            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
++            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
+         else:
+-            prefix = u''
++            prefix = ''
+         if not pretty_print:
+             indent_level = None
+         else:
+@@ -788,4 +789,4 @@ class FeatureNotFound(ValueError):
+ if __name__ == '__main__':
+     import sys
+     soup = BeautifulSoup(sys.stdin)
+-    print(soup.prettify())
++    print((soup.prettify()))
+diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
+index 03da4c6..03fbd6a 100644
+--- a/bs4/builder/__init__.py
++++ b/bs4/builder/__init__.py
+@@ -300,13 +300,13 @@ class TreeBuilder(object):
+             universal = self.cdata_list_attributes.get('*', [])
+             tag_specific = self.cdata_list_attributes.get(
+                 tag_name.lower(), None)
+-            for attr in attrs.keys():
++            for attr in list(attrs.keys()):
+                 if attr in universal or (tag_specific and attr in tag_specific):
+                     # We have a "class"-type attribute whose string
+                     # value is a whitespace-separated list of
+                     # values. Split it into a list.
+                     value = attrs[attr]
+-                    if isinstance(value, basestring):
++                    if isinstance(value, str):
+                         values = nonwhitespace_re.findall(value)
+                     else:
+                         # html5lib sometimes calls setAttributes twice
+@@ -496,7 +496,7 @@ class ParserRejectedMarkup(Exception):
+         """
+         if isinstance(message_or_exception, Exception):
+             e = message_or_exception
+-            message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e))
++            message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
+         super(ParserRejectedMarkup, self).__init__(message_or_exception)
+             
+ # Builders are registered in reverse order of priority, so that custom
+diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
+index a1c6134..69aefd7 100644
+--- a/bs4/builder/_html5lib.py
++++ b/bs4/builder/_html5lib.py
+@@ -33,7 +33,7 @@ try:
+     # Pre-0.99999999
+     from html5lib.treebuilders import _base as treebuilder_base
+     new_html5lib = False
+-except ImportError, e:
++except ImportError as e:
+     # 0.99999999 and up
+     from html5lib.treebuilders import base as treebuilder_base
+     new_html5lib = True
+@@ -79,7 +79,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
+         self.underlying_builder.parser = parser
+         extra_kwargs = dict()
+-        if not isinstance(markup, unicode):
++        if not isinstance(markup, str):
+             if new_html5lib:
+                 extra_kwargs['override_encoding'] = self.user_specified_encoding
+             else:
+@@ -87,13 +87,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+         doc = parser.parse(markup, **extra_kwargs)
+         
+         # Set the character encoding detected by the tokenizer.
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # We need to special-case this because html5lib sets
+             # charEncoding to UTF-8 if it gets Unicode input.
+             doc.original_encoding = None
+         else:
+             original_encoding = parser.tokenizer.stream.charEncoding[0]
+-            if not isinstance(original_encoding, basestring):
++            if not isinstance(original_encoding, str):
+                 # In 0.99999999 and up, the encoding is an html5lib
+                 # Encoding object. We want to use a string for compatibility
+                 # with other tree builders.
+@@ -110,7 +110,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<html><head></head><body>%s</body></html>' % fragment
++        return '<html><head></head><body>%s</body></html>' % fragment
+ 
+ 
+ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
+@@ -217,7 +217,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
+                 rv.append("|%s<%s>" % (' ' * indent, name))
+                 if element.attrs:
+                     attributes = []
+-                    for name, value in element.attrs.items():
++                    for name, value in list(element.attrs.items()):
+                         if isinstance(name, NamespacedAttribute):
+                             name = "%s %s" % (prefixes[name.namespace], name.name)
+                         if isinstance(value, list):
+@@ -272,7 +272,7 @@ class Element(treebuilder_base.Node):
+ 
+     def appendChild(self, node):
+         string_child = child = None
+-        if isinstance(node, basestring):
++        if isinstance(node, str):
+             # Some other piece of code decided to pass in a string
+             # instead of creating a TextElement object to contain the
+             # string.
+@@ -289,7 +289,7 @@ class Element(treebuilder_base.Node):
+             child = node.element
+             node.parent = self
+ 
+-        if not isinstance(child, basestring) and child.parent is not None:
++        if not isinstance(child, str) and child.parent is not None:
+             node.element.extract()
+ 
+         if (string_child is not None and self.element.contents
+@@ -302,7 +302,7 @@ class Element(treebuilder_base.Node):
+             old_element.replace_with(new_element)
+             self.soup._most_recent_element = new_element
+         else:
+-            if isinstance(node, basestring):
++            if isinstance(node, str):
+                 # Create a brand new NavigableString from this string.
+                 child = self.soup.new_string(node)
+ 
+@@ -340,7 +340,7 @@ class Element(treebuilder_base.Node):
+ 
+             self.soup.builder._replace_cdata_list_attribute_values(
+                 self.name, attributes)
+-            for name, value in attributes.items():
++            for name, value in list(attributes.items()):
+                 self.element[name] = value
+ 
+             # The attributes may contain variables that need substitution.
+diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
+index 96a7b7d..88860a9 100644
+--- a/bs4/builder/_htmlparser.py
++++ b/bs4/builder/_htmlparser.py
+@@ -8,11 +8,11 @@ __all__ = [
+     'HTMLParserTreeBuilder',
+     ]
+ 
+-from HTMLParser import HTMLParser
++from html.parser import HTMLParser
+ 
+ try:
+-    from HTMLParser import HTMLParseError
+-except ImportError, e:
++    from html.parser import HTMLParseError
++except ImportError as e:
+     # HTMLParseError is removed in Python 3.5. Since it can never be
+     # thrown in 3.5, we can just define our own class as a placeholder.
+     class HTMLParseError(Exception):
+@@ -219,14 +219,14 @@ class BeautifulSoupHTMLParser(HTMLParser):
+                     continue
+                 try:
+                     data = bytearray([real_name]).decode(encoding)
+-                except UnicodeDecodeError, e:
++                except UnicodeDecodeError as e:
+                     pass
+         if not data:
+             try:
+-                data = unichr(real_name)
+-            except (ValueError, OverflowError), e:
++                data = chr(real_name)
++            except (ValueError, OverflowError) as e:
+                 pass
+-        data = data or u"\N{REPLACEMENT CHARACTER}"
++        data = data or "\N{REPLACEMENT CHARACTER}"
+         self.handle_data(data)
+ 
+     def handle_entityref(self, name):
+@@ -353,7 +353,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
+          document to Unicode and parsing it. Each strategy will be tried 
+          in turn.
+         """
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # Parse Unicode as-is.
+             yield (markup, None, None, False)
+             return
+@@ -376,7 +376,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
+         try:
+             parser.feed(markup)
+             parser.close()
+-        except HTMLParseError, e:
++        except HTMLParseError as e:
+             warnings.warn(RuntimeWarning(
+                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+             raise e
+diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
+index 1b44d75..432a2c8 100644
+--- a/bs4/builder/_lxml.py
++++ b/bs4/builder/_lxml.py
+@@ -8,11 +8,11 @@ __all__ = [
+ 
+ try:
+     from collections.abc import Callable # Python 3.6
+-except ImportError , e:
++except ImportError as e:
+     from collections import Callable
+ 
+ from io import BytesIO
+-from StringIO import StringIO
++from io import StringIO
+ from lxml import etree
+ from bs4.element import (
+     Comment,
+@@ -35,7 +35,7 @@ LXML = 'lxml'
+ 
+ def _invert(d):
+     "Invert a dictionary."
+-    return dict((v,k) for k, v in d.items())
++    return dict((v,k) for k, v in list(d.items()))
+ 
+ class LXMLTreeBuilderForXML(TreeBuilder):
+     DEFAULT_PARSER_CLASS = etree.XMLParser
+@@ -81,7 +81,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+ 
+         :param mapping: A dictionary mapping namespace prefixes to URIs.
+         """
+-        for key, value in mapping.items():
++        for key, value in list(mapping.items()):
+             if key and key not in self.soup._namespaces:
+                 # Let the BeautifulSoup object know about a new namespace.
+                 # If there are multiple namespaces defined with the same
+@@ -169,12 +169,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+         else:
+             self.processing_instruction_class = XMLProcessingInstruction
+ 
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # We were given Unicode. Maybe lxml can parse Unicode on
+             # this system?
+             yield markup, None, document_declared_encoding, False
+ 
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # No, apparently not. Convert the Unicode to UTF-8 and
+             # tell lxml to parse it as UTF-8.
+             yield (markup.encode("utf8"), "utf8",
+@@ -189,7 +189,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+     def feed(self, markup):
+         if isinstance(markup, bytes):
+             markup = BytesIO(markup)
+-        elif isinstance(markup, unicode):
++        elif isinstance(markup, str):
+             markup = StringIO(markup)
+ 
+         # Call feed() at least once, even if the markup is empty,
+@@ -204,7 +204,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+                 if len(data) != 0:
+                     self.parser.feed(data)
+             self.parser.close()
+-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
++        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+             raise ParserRejectedMarkup(e)
+ 
+     def close(self):
+@@ -233,7 +233,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+             # Also treat the namespace mapping as a set of attributes on the
+             # tag, so we can recreate it later.
+             attrs = attrs.copy()
+-            for prefix, namespace in nsmap.items():
++            for prefix, namespace in list(nsmap.items()):
+                 attribute = NamespacedAttribute(
+                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
+                 attrs[attribute] = namespace
+@@ -242,7 +242,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+         # from lxml with namespaces attached to their names, and
+         # turn then into NamespacedAttribute objects.
+         new_attrs = {}
+-        for attr, value in attrs.items():
++        for attr, value in list(attrs.items()):
+             namespace, attr = self._getNsTag(attr)
+             if namespace is None:
+                 new_attrs[attr] = value
+@@ -302,7 +302,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
++        return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+ 
+ 
+ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+@@ -323,10 +323,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+             self.parser = self.parser_for(encoding)
+             self.parser.feed(markup)
+             self.parser.close()
+-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
++        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+             raise ParserRejectedMarkup(e)
+ 
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<html><body>%s</body></html>' % fragment
++        return '<html><body>%s</body></html>' % fragment
+diff --git a/bs4/dammit.py b/bs4/dammit.py
+index 33f7b7d..ee3708f 100644
+--- a/bs4/dammit.py
++++ b/bs4/dammit.py
+@@ -10,7 +10,7 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
+ __license__ = "MIT"
+ 
+ import codecs
+-from htmlentitydefs import codepoint2name
++from html.entities import codepoint2name
+ import re
+ import logging
+ import string
+@@ -22,7 +22,7 @@ try:
+     #  PyPI package: cchardet
+     import cchardet
+     def chardet_dammit(s):
+-        if isinstance(s, unicode):
++        if isinstance(s, str):
+             return None
+         return cchardet.detect(s)['encoding']
+ except ImportError:
+@@ -32,7 +32,7 @@ except ImportError:
+         #  PyPI package: chardet
+         import chardet
+         def chardet_dammit(s):
+-            if isinstance(s, unicode):
++            if isinstance(s, str):
+                 return None
+             return chardet.detect(s)['encoding']
+         #import chardet.constants
+@@ -53,14 +53,14 @@ except ImportError:
+ 
+ # Build bytestring and Unicode versions of regular expressions for finding
+ # a declared encoding inside an XML or HTML document.
+-xml_encoding = u'^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
+-html_meta = u'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
++xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
++html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
+ encoding_res = dict()
+ encoding_res[bytes] = {
+     'html' : re.compile(html_meta.encode("ascii"), re.I),
+     'xml' : re.compile(xml_encoding.encode("ascii"), re.I),
+ }
+-encoding_res[unicode] = {
++encoding_res[str] = {
+     'html' : re.compile(html_meta, re.I),
+     'xml' : re.compile(xml_encoding, re.I)
+ }
+@@ -80,7 +80,7 @@ class EntitySubstitution(object):
+         # entities, but that's a little tricky.
+         extra = [(39, 'apos')]
+         for codepoint, name in list(codepoint2name.items()) + extra:
+-            character = unichr(codepoint)
++            character = chr(codepoint)
+             if codepoint not in (34, 39):
+                 # There's no point in turning the quotation mark into
+                 # &quot; or the single quote into &apos;, unless it
+@@ -323,7 +323,7 @@ class EncodingDetector:
+         :return: A 2-tuple (modified data, implied encoding)
+         """
+         encoding = None
+-        if isinstance(data, unicode):
++        if isinstance(data, str):
+             # Unicode data cannot have a byte-order mark.
+             return data, encoding
+         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
+@@ -370,7 +370,7 @@ class EncodingDetector:
+         if isinstance(markup, bytes):
+             res = encoding_res[bytes]
+         else:
+-            res = encoding_res[unicode]
++            res = encoding_res[str]
+ 
+         xml_re = res['xml']
+         html_re = res['html']
+@@ -431,9 +431,9 @@ class UnicodeDammit:
+             markup, override_encodings, is_html, exclude_encodings)
+ 
+         # Short-circuit if the data is in Unicode to begin with.
+-        if isinstance(markup, unicode) or markup == '':
++        if isinstance(markup, str) or markup == '':
+             self.markup = markup
+-            self.unicode_markup = unicode(markup)
++            self.unicode_markup = str(markup)
+             self.original_encoding = None
+             return
+ 
+@@ -523,7 +523,7 @@ class UnicodeDammit:
+ 
+         :param encoding: The name of an encoding.
+         """
+-        return unicode(data, encoding, errors)
++        return str(data, encoding, errors)
+ 
+     @property
+     def declared_html_encoding(self):
+diff --git a/bs4/diagnose.py b/bs4/diagnose.py
+index e4f2f47..500e92d 100644
+--- a/bs4/diagnose.py
++++ b/bs4/diagnose.py
+@@ -4,8 +4,8 @@
+ __license__ = "MIT"
+ 
+ import cProfile
+-from StringIO import StringIO
+-from HTMLParser import HTMLParser
++from io import StringIO
++from html.parser import HTMLParser
+ import bs4
+ from bs4 import BeautifulSoup, __version__
+ from bs4.builder import builder_registry
+@@ -25,8 +25,8 @@ def diagnose(data):
+     :param data: A string containing markup that needs to be explained.
+     :return: None; diagnostics are printed to standard output.
+     """
+-    print("Diagnostic running on Beautiful Soup %s" % __version__)
+-    print("Python version %s" % sys.version)
++    print(("Diagnostic running on Beautiful Soup %s" % __version__))
++    print(("Python version %s" % sys.version))
+ 
+     basic_parsers = ["html.parser", "html5lib", "lxml"]
+     for name in basic_parsers:
+@@ -35,16 +35,16 @@ def diagnose(data):
+                 break
+         else:
+             basic_parsers.remove(name)
+-            print(
++            print((
+                 "I noticed that %s is not installed. Installing it may help." %
+-                name)
++                name))
+ 
+     if 'lxml' in basic_parsers:
+         basic_parsers.append("lxml-xml")
+         try:
+             from lxml import etree
+-            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+-        except ImportError, e:
++            print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))))
++        except ImportError as e:
+             print(
+                 "lxml is not installed or couldn't be imported.")
+ 
+@@ -52,21 +52,21 @@ def diagnose(data):
+     if 'html5lib' in basic_parsers:
+         try:
+             import html5lib
+-            print("Found html5lib version %s" % html5lib.__version__)
+-        except ImportError, e:
++            print(("Found html5lib version %s" % html5lib.__version__))
++        except ImportError as e:
+             print(
+                 "html5lib is not installed or couldn't be imported.")
+ 
+     if hasattr(data, 'read'):
+         data = data.read()
+     elif data.startswith("http:") or data.startswith("https:"):
+-        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
++        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
+         print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
+         return
+     else:
+         try:
+             if os.path.exists(data):
+-                print('"%s" looks like a filename. Reading data from the file.' % data)
++                print(('"%s" looks like a filename. Reading data from the file.' % data))
+                 with open(data) as fp:
+                     data = fp.read()
+         except ValueError:
+@@ -76,19 +76,19 @@ def diagnose(data):
+         print("")
+ 
+     for parser in basic_parsers:
+-        print("Trying to parse your markup with %s" % parser)
++        print(("Trying to parse your markup with %s" % parser))
+         success = False
+         try:
+             soup = BeautifulSoup(data, features=parser)
+             success = True
+-        except Exception, e:
+-            print("%s could not parse the markup." % parser)
++        except Exception as e:
++            print(("%s could not parse the markup." % parser))
+             traceback.print_exc()
+         if success:
+-            print("Here's what %s did with the markup:" % parser)
+-            print(soup.prettify())
++            print(("Here's what %s did with the markup:" % parser))
++            print((soup.prettify()))
+ 
+-        print("-" * 80)
++        print(("-" * 80))
+ 
+ def lxml_trace(data, html=True, **kwargs):
+     """Print out the lxml events that occur during parsing.
+@@ -104,7 +104,7 @@ def lxml_trace(data, html=True, **kwargs):
+     """
+     from lxml import etree
+     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+-        print("%s, %4s, %s" % (event, element.tag, element.text))
++        print(("%s, %4s, %s" % (event, element.tag, element.text)))
+ 
+ class AnnouncingParser(HTMLParser):
+     """Subclass of HTMLParser that announces parse events, without doing
+@@ -193,9 +193,9 @@ def rdoc(num_elements=1000):
+ 
+ def benchmark_parsers(num_elements=100000):
+     """Very basic head-to-head performance benchmark."""
+-    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
++    print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
+     data = rdoc(num_elements)
+-    print("Generated a large invalid HTML document (%d bytes)." % len(data))
++    print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
+     
+     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
+         success = False
+@@ -204,24 +204,24 @@ def benchmark_parsers(num_elements=100000):
+             soup = BeautifulSoup(data, parser)
+             b = time.time()
+             success = True
+-        except Exception, e:
+-            print("%s could not parse the markup." % parser)
++        except Exception as e:
++            print(("%s could not parse the markup." % parser))
+             traceback.print_exc()
+         if success:
+-            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
++            print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a)))
+ 
+     from lxml import etree
+     a = time.time()
+     etree.HTML(data)
+     b = time.time()
+-    print("Raw lxml parsed the markup in %.2fs." % (b-a))
++    print(("Raw lxml parsed the markup in %.2fs." % (b-a)))
+ 
+     import html5lib
+     parser = html5lib.HTMLParser()
+     a = time.time()
+     parser.parse(data)
+     b = time.time()
+-    print("Raw html5lib parsed the markup in %.2fs." % (b-a))
++    print(("Raw html5lib parsed the markup in %.2fs." % (b-a)))
+ 
+ def profile(num_elements=100000, parser="lxml"):
+     """Use Python's profiler on a randomly generated document."""
+diff --git a/bs4/element.py b/bs4/element.py
+index 09a81d9..81d9db9 100644
+--- a/bs4/element.py
++++ b/bs4/element.py
+@@ -3,14 +3,14 @@ __license__ = "MIT"
+ 
+ try:
+     from collections.abc import Callable # Python 3.6
+-except ImportError , e:
++except ImportError as e:
+     from collections import Callable
+ import re
+ import sys
+ import warnings
+ try:
+     import soupsieve
+-except ImportError, e:
++except ImportError as e:
+     soupsieve = None
+     warnings.warn(
+         'The soupsieve package is not installed. CSS selectors cannot be used.'
+@@ -57,22 +57,22 @@ def _alias(attr):
+ # Source:
+ # https://docs.python.org/3/library/codecs.html#python-specific-encodings
+ PYTHON_SPECIFIC_ENCODINGS = set([
+-    u"idna",
+-    u"mbcs",
+-    u"oem",
+-    u"palmos",
+-    u"punycode",
+-    u"raw_unicode_escape",
+-    u"undefined",
+-    u"unicode_escape",
+-    u"raw-unicode-escape",
+-    u"unicode-escape",
+-    u"string-escape",
+-    u"string_escape",
++    "idna",
++    "mbcs",
++    "oem",
++    "palmos",
++    "punycode",
++    "raw_unicode_escape",
++    "undefined",
++    "unicode_escape",
++    "raw-unicode-escape",
++    "unicode-escape",
++    "string-escape",
++    "string_escape",
+ ])
+     
+ 
+-class NamespacedAttribute(unicode):
++class NamespacedAttribute(str):
+     """A namespaced string (e.g. 'xml:lang') that remembers the namespace
+     ('xml') and the name ('lang') that were used to create it.
+     """
+@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode):
+             name = None
+ 
+         if name is None:
+-            obj = unicode.__new__(cls, prefix)
++            obj = str.__new__(cls, prefix)
+         elif prefix is None:
+             # Not really namespaced.
+-            obj = unicode.__new__(cls, name)
++            obj = str.__new__(cls, name)
+         else:
+-            obj = unicode.__new__(cls, prefix + ":" + name)
++            obj = str.__new__(cls, prefix + ":" + name)
+         obj.prefix = prefix
+         obj.name = name
+         obj.namespace = namespace
+         return obj
+ 
+-class AttributeValueWithCharsetSubstitution(unicode):
++class AttributeValueWithCharsetSubstitution(str):
+     """A stand-in object for a character encoding specified in HTML."""
+ 
+ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+     """
+ 
+     def __new__(cls, original_value):
+-        obj = unicode.__new__(cls, original_value)
++        obj = str.__new__(cls, original_value)
+         obj.original_value = original_value
+         return obj
+ 
+@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+         match = cls.CHARSET_RE.search(original_value)
+         if match is None:
+             # No substitution necessary.
+-            return unicode.__new__(unicode, original_value)
++            return str.__new__(str, original_value)
+ 
+-        obj = unicode.__new__(cls, original_value)
++        obj = str.__new__(cls, original_value)
+         obj.original_value = original_value
+         return obj
+ 
+@@ -376,7 +376,7 @@ class PageElement(object):
+             raise ValueError("Cannot insert None into a tag.")
+         if new_child is self:
+             raise ValueError("Cannot insert a tag into itself.")
+-        if (isinstance(new_child, basestring)
++        if (isinstance(new_child, str)
+             and not isinstance(new_child, NavigableString)):
+             new_child = NavigableString(new_child)
+ 
+@@ -753,7 +753,7 @@ class PageElement(object):
+                 result = (element for element in generator
+                           if isinstance(element, Tag))
+                 return ResultSet(strainer, result)
+-            elif isinstance(name, basestring):
++            elif isinstance(name, str):
+                 # Optimization to find all tags with a given name.
+                 if name.count(':') == 1:
+                     # This is a name with a prefix. If this is a namespace-aware document,
+@@ -872,7 +872,7 @@ class PageElement(object):
+         return self.parents
+ 
+ 
+-class NavigableString(unicode, PageElement):
++class NavigableString(str, PageElement):
+     """A Python Unicode string that is part of a parse tree.
+ 
+     When Beautiful Soup parses the markup <b>penguin</b>, it will
+@@ -895,10 +895,10 @@ class NavigableString(unicode, PageElement):
+         passed in to the superclass's __new__ or the superclass won't know
+         how to handle non-ASCII characters.
+         """
+-        if isinstance(value, unicode):
+-            u = unicode.__new__(cls, value)
++        if isinstance(value, str):
++            u = str.__new__(cls, value)
+         else:
+-            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
++            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+         u.setup()
+         return u
+ 
+@@ -909,7 +909,7 @@ class NavigableString(unicode, PageElement):
+         return type(self)(self)
+ 
+     def __getnewargs__(self):
+-        return (unicode(self),)
++        return (str(self),)
+ 
+     def __getattr__(self, attr):
+         """text.string gives you text. This is for backwards
+@@ -975,30 +975,30 @@ class PreformattedString(NavigableString):
+ 
+ class CData(PreformattedString):
+     """A CDATA block."""
+-    PREFIX = u'<![CDATA['
+-    SUFFIX = u']]>'
++    PREFIX = '<![CDATA['
++    SUFFIX = ']]>'
+ 
+ class ProcessingInstruction(PreformattedString):
+     """A SGML processing instruction."""
+ 
+-    PREFIX = u'<?'
+-    SUFFIX = u'>'
++    PREFIX = '<?'
++    SUFFIX = '>'
+ 
+ class XMLProcessingInstruction(ProcessingInstruction):
+     """An XML processing instruction."""
+-    PREFIX = u'<?'
+-    SUFFIX = u'?>'
++    PREFIX = '<?'
++    SUFFIX = '?>'
+ 
+ class Comment(PreformattedString):
+     """An HTML or XML comment."""
+-    PREFIX = u'<!--'
+-    SUFFIX = u'-->'
++    PREFIX = '<!--'
++    SUFFIX = '-->'
+ 
+ 
+ class Declaration(PreformattedString):
+     """An XML declaration."""
+-    PREFIX = u'<?'
+-    SUFFIX = u'?>'
++    PREFIX = '<?'
++    SUFFIX = '?>'
+ 
+ 
+ class Doctype(PreformattedString):
+@@ -1026,8 +1026,8 @@ class Doctype(PreformattedString):
+ 
+         return Doctype(value)
+ 
+-    PREFIX = u'<!DOCTYPE '
+-    SUFFIX = u'>\n'
++    PREFIX = '<!DOCTYPE '
++    SUFFIX = '>\n'
+ 
+ 
+ class Stylesheet(NavigableString):
+@@ -1263,7 +1263,7 @@ class Tag(PageElement):
+         for string in self._all_strings(True):
+             yield string
+ 
+-    def get_text(self, separator=u"", strip=False,
++    def get_text(self, separator="", strip=False,
+                  types=(NavigableString, CData)):
+         """Get all child strings, concatenated using the given separator.
+ 
+@@ -1416,7 +1416,7 @@ class Tag(PageElement):
+     def __contains__(self, x):
+         return x in self.contents
+ 
+-    def __nonzero__(self):
++    def __bool__(self):
+         "A tag is non-None even if it has no contents."
+         return True
+ 
+@@ -1565,8 +1565,8 @@ class Tag(PageElement):
+             else:
+                 if isinstance(val, list) or isinstance(val, tuple):
+                     val = ' '.join(val)
+-                elif not isinstance(val, basestring):
+-                    val = unicode(val)
++                elif not isinstance(val, str):
++                    val = str(val)
+                 elif (
+                         isinstance(val, AttributeValueWithCharsetSubstitution)
+                         and eventual_encoding is not None
+@@ -1575,7 +1575,7 @@ class Tag(PageElement):
+ 
+                 text = formatter.attribute_value(val)
+                 decoded = (
+-                    unicode(key) + '='
++                    str(key) + '='
+                     + formatter.quoted_attribute_value(text))
+             attrs.append(decoded)
+         close = ''
+@@ -1934,7 +1934,7 @@ class SoupStrainer(object):
+             else:
+                 attrs = kwargs
+         normalized_attrs = {}
+-        for key, value in attrs.items():
++        for key, value in list(attrs.items()):
+             normalized_attrs[key] = self._normalize_search_value(value)
+ 
+         self.attrs = normalized_attrs
+@@ -1943,7 +1943,7 @@ class SoupStrainer(object):
+     def _normalize_search_value(self, value):
+         # Leave it alone if it's a Unicode string, a callable, a
+         # regular expression, a boolean, or None.
+-        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
++        if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
+             or isinstance(value, bool) or value is None):
+             return value
+ 
+@@ -1956,7 +1956,7 @@ class SoupStrainer(object):
+             new_value = []
+             for v in value:
+                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
+-                    and not isinstance(v, unicode)):
++                    and not isinstance(v, str)):
+                     # This is almost certainly the user's mistake. In the
+                     # interests of avoiding infinite loops, we'll let
+                     # it through as-is rather than doing a recursive call.
+@@ -1968,7 +1968,7 @@ class SoupStrainer(object):
+         # Otherwise, convert it into a Unicode string.
+         # The unicode(str()) thing is so this will do the same thing on Python 2
+         # and Python 3.
+-        return unicode(str(value))
++        return str(str(value))
+ 
+     def __str__(self):
+         """A human-readable representation of this SoupStrainer."""
+@@ -1996,7 +1996,7 @@ class SoupStrainer(object):
+             markup = markup_name
+             markup_attrs = markup
+ 
+-        if isinstance(self.name, basestring):
++        if isinstance(self.name, str):
+             # Optimization for a very common case where the user is
+             # searching for a tag with one specific name, and we're
+             # looking at a tag with a different name.
+@@ -2052,7 +2052,7 @@ class SoupStrainer(object):
+         found = None
+         # If given a list of items, scan it for a text element that
+         # matches.
+-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
++        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
+             for element in markup:
+                 if isinstance(element, NavigableString) \
+                        and self.search(element):
+@@ -2065,7 +2065,7 @@ class SoupStrainer(object):
+                 found = self.search_tag(markup)
+         # If it's text, make sure the text matches.
+         elif isinstance(markup, NavigableString) or \
+-                 isinstance(markup, basestring):
++                 isinstance(markup, str):
+             if not self.name and not self.attrs and self._matches(markup, self.text):
+                 found = markup
+         else:
+@@ -2110,7 +2110,7 @@ class SoupStrainer(object):
+             return not match_against
+ 
+         if (hasattr(match_against, '__iter__')
+-            and not isinstance(match_against, basestring)):
++            and not isinstance(match_against, str)):
+             # We're asked to match against an iterable of items.
+             # The markup must be match at least one item in the
+             # iterable. We'll try each one in turn.
+@@ -2137,7 +2137,7 @@ class SoupStrainer(object):
+         # the tag's name and once against its prefixed name.
+         match = False
+         
+-        if not match and isinstance(match_against, unicode):
++        if not match and isinstance(match_against, str):
+             # Exact string match
+             match = markup == match_against
+ 
+diff --git a/bs4/formatter.py b/bs4/formatter.py
+index 9a692ec..2cbab4c 100644
+--- a/bs4/formatter.py
++++ b/bs4/formatter.py
+@@ -83,7 +83,7 @@ class Formatter(EntitySubstitution):
+         """
+         if not self.entity_substitution:
+             return ns
+-        from element import NavigableString
++        from .element import NavigableString
+         if (isinstance(ns, NavigableString)
+             and ns.parent is not None
+             and ns.parent.name in self.cdata_containing_tags):
+diff --git a/bs4/testing.py b/bs4/testing.py
+index a2f83a1..9ca507b 100644
+--- a/bs4/testing.py
++++ b/bs4/testing.py
+@@ -25,7 +25,7 @@ from bs4.element import (
+ from bs4.builder import HTMLParserTreeBuilder
+ default_builder = HTMLParserTreeBuilder
+ 
+-BAD_DOCUMENT = u"""A bare string
++BAD_DOCUMENT = """A bare string
+ <!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
+ <!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
+ <div><![CDATA[A CDATA section where it doesn't belong]]></div>
+@@ -94,7 +94,7 @@ class SoupTest(unittest.TestCase):
+         # Verify that every tag that was opened was eventually closed.
+ 
+         # There are no tags in the open tag counter.
+-        assert all(v==0 for v in obj.open_tag_counter.values())
++        assert all(v==0 for v in list(obj.open_tag_counter.values()))
+ 
+         # The only tag in the tag stack is the one for the root
+         # document.
+@@ -372,7 +372,7 @@ class HTMLTreeBuilderSmokeTest(object):
+         # process_markup correctly sets processing_instruction_class
+         # even when the markup is already Unicode and there is no
+         # need to process anything.
+-        markup = u"""<?PITarget PIContent?>"""
++        markup = """<?PITarget PIContent?>"""
+         soup = self.soup(markup)
+         self.assertEqual(markup, soup.decode())
+ 
+@@ -544,14 +544,14 @@ Hello, world!
+         # "&T" and "&p" look like incomplete character entities, but they are
+         # not.
+         self.assertSoupEquals(
+-            u"<p>&bull; AT&T is in the s&p 500</p>",
+-            u"<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
++            "<p>&bull; AT&T is in the s&p 500</p>",
++            "<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
+         )
+ 
+     def test_apos_entity(self):
+         self.assertSoupEquals(
+-            u"<p>Bob&apos;s Bar</p>",
+-            u"<p>Bob's Bar</p>",
++            "<p>Bob&apos;s Bar</p>",
++            "<p>Bob's Bar</p>",
+         )
+         
+     def test_entities_in_foreign_document_encoding(self):
+@@ -564,17 +564,17 @@ Hello, world!
+         # characters.
+         markup = "<p>&#147;Hello&#148; &#45;&#9731;</p>"
+         soup = self.soup(markup)
+-        self.assertEquals(u"“Hello” -☃", soup.p.string)
++        self.assertEqual("“Hello” -☃", soup.p.string)
+         
+     def test_entities_in_attributes_converted_to_unicode(self):
+-        expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
++        expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
+         self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
+         self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
+         self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
+         self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
+ 
+     def test_entities_in_text_converted_to_unicode(self):
+-        expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
++        expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
+         self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
+         self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
+         self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
+@@ -585,7 +585,7 @@ Hello, world!
+                               '<p>I said "good day!"</p>')
+ 
+     def test_out_of_range_entity(self):
+-        expect = u"\N{REPLACEMENT CHARACTER}"
++        expect = "\N{REPLACEMENT CHARACTER}"
+         self.assertSoupEquals("&#10000000000000;", expect)
+         self.assertSoupEquals("&#x10000000000000;", expect)
+         self.assertSoupEquals("&#1000000000;", expect)
+@@ -663,9 +663,9 @@ Hello, world!
+         # A seemingly innocuous document... but it's in Unicode! And
+         # it contains characters that can't be represented in the
+         # encoding found in the  declaration! The horror!
+-        markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
++        markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
+         soup = self.soup(markup)
+-        self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
++        self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
+ 
+     def test_soupstrainer(self):
+         """Parsers should be able to work with SoupStrainers."""
+@@ -705,7 +705,7 @@ Hello, world!
+         # Both XML and HTML entities are converted to Unicode characters
+         # during parsing.
+         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+-        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
++        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
+         self.assertSoupEquals(text, expected)
+ 
+     def test_smart_quotes_converted_on_the_way_in(self):
+@@ -715,15 +715,15 @@ Hello, world!
+         soup = self.soup(quote)
+         self.assertEqual(
+             soup.p.string,
+-            u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
++            "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
+ 
+     def test_non_breaking_spaces_converted_on_the_way_in(self):
+         soup = self.soup("<a>&nbsp;&nbsp;</a>")
+-        self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
++        self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
+ 
+     def test_entities_converted_on_the_way_out(self):
+         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+-        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
++        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
+         soup = self.soup(text)
+         self.assertEqual(soup.p.encode("utf-8"), expected)
+ 
+@@ -732,7 +732,7 @@ Hello, world!
+         # easy-to-understand document.
+ 
+         # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
+-        unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
++        unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
+ 
+         # That's because we're going to encode it into ISO-Latin-1, and use
+         # that to test.
+@@ -848,8 +848,8 @@ Hello, world!
+             soup = self.soup(markup)
+             for encoding in PYTHON_SPECIFIC_ENCODINGS:
+                 if encoding in (
+-                    u'idna', u'mbcs', u'oem', u'undefined',
+-                    u'string_escape', u'string-escape'
++                    'idna', 'mbcs', 'oem', 'undefined',
++                    'string_escape', 'string-escape'
+                 ):
+                     # For one reason or another, these will raise an
+                     # exception if we actually try to use them, so don't
+@@ -910,8 +910,8 @@ class XMLTreeBuilderSmokeTest(object):
+         soup = self.soup(markup)
+         for encoding in PYTHON_SPECIFIC_ENCODINGS:
+             if encoding in (
+-                u'idna', u'mbcs', u'oem', u'undefined',
+-                u'string_escape', u'string-escape'
++                'idna', 'mbcs', 'oem', 'undefined',
++                'string_escape', 'string-escape'
+             ):
+                 # For one reason or another, these will raise an
+                 # exception if we actually try to use them, so don't
+@@ -962,15 +962,15 @@ class XMLTreeBuilderSmokeTest(object):
+         self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
+ 
+     def test_can_parse_unicode_document(self):
+-        markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
++        markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
+         soup = self.soup(markup)
+-        self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
++        self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
+ 
+     def test_popping_namespaced_tag(self):
+         markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
+         soup = self.soup(markup)
+         self.assertEqual(
+-            unicode(soup.rss), markup)
++            str(soup.rss), markup)
+ 
+     def test_docstring_includes_correct_encoding(self):
+         soup = self.soup("<root/>")
+@@ -1001,17 +1001,17 @@ class XMLTreeBuilderSmokeTest(object):
+     def test_closing_namespaced_tag(self):
+         markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
+         soup = self.soup(markup)
+-        self.assertEqual(unicode(soup.p), markup)
++        self.assertEqual(str(soup.p), markup)
+ 
+     def test_namespaced_attributes(self):
+         markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
+         soup = self.soup(markup)
+-        self.assertEqual(unicode(soup.foo), markup)
++        self.assertEqual(str(soup.foo), markup)
+ 
+     def test_namespaced_attributes_xml_namespace(self):
+         markup = '<foo xml:lang="fr">bar</foo>'
+         soup = self.soup(markup)
+-        self.assertEqual(unicode(soup.foo), markup)
++        self.assertEqual(str(soup.foo), markup)
+ 
+     def test_find_by_prefixed_name(self):
+         doc = """<?xml version="1.0" encoding="utf-8"?>
+diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
+index 7b0a6d4..b77659b 100644
+--- a/bs4/tests/test_html5lib.py
++++ b/bs4/tests/test_html5lib.py
+@@ -5,7 +5,7 @@ import warnings
+ try:
+     from bs4.builder import HTML5TreeBuilder
+     HTML5LIB_PRESENT = True
+-except ImportError, e:
++except ImportError as e:
+     HTML5LIB_PRESENT = False
+ from bs4.element import SoupStrainer
+ from bs4.testing import (
+@@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
+     def test_reparented_markup(self):
+         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
++        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+         self.assertEqual(2, len(soup.find_all('p')))
+ 
+ 
+     def test_reparented_markup_ends_with_whitespace(self):
+         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
++        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+         self.assertEqual(2, len(soup.find_all('p')))
+ 
+     def test_reparented_markup_containing_identical_whitespace_nodes(self):
+@@ -127,7 +127,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
+     def test_foster_parenting(self):
+         markup = b"""<table><td></tbody>A"""
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
++        self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
+ 
+     def test_extraction(self):
+         """
+diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
+index 7ee91aa..aeff094 100644
+--- a/bs4/tests/test_htmlparser.py
++++ b/bs4/tests/test_htmlparser.py
+@@ -60,20 +60,20 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+         # If you don't provide any particular value for
+         # on_duplicate_attribute, later values replace earlier values.
+         soup = self.soup(markup)
+-        self.assertEquals("url3", soup.a['href'])
+-        self.assertEquals(["cls"], soup.a['class'])
+-        self.assertEquals("id", soup.a['id'])
++        self.assertEqual("url3", soup.a['href'])
++        self.assertEqual(["cls"], soup.a['class'])
++        self.assertEqual("id", soup.a['id'])
+         
+         # You can also get this behavior explicitly.
+         def assert_attribute(on_duplicate_attribute, expected):
+             soup = self.soup(
+                 markup, on_duplicate_attribute=on_duplicate_attribute
+             )
+-            self.assertEquals(expected, soup.a['href'])
++            self.assertEqual(expected, soup.a['href'])
+ 
+             # Verify that non-duplicate attributes are treated normally.
+-            self.assertEquals(["cls"], soup.a['class'])
+-            self.assertEquals("id", soup.a['id'])
++            self.assertEqual(["cls"], soup.a['class'])
++            self.assertEqual("id", soup.a['id'])
+         assert_attribute(None, "url3")
+         assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3")
+ 
+diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
+index f96e4ae..3d0c75f 100644
+--- a/bs4/tests/test_lxml.py
++++ b/bs4/tests/test_lxml.py
+@@ -7,7 +7,7 @@ try:
+     import lxml.etree
+     LXML_PRESENT = True
+     LXML_VERSION = lxml.etree.LXML_VERSION
+-except ImportError, e:
++except ImportError as e:
+     LXML_PRESENT = False
+     LXML_VERSION = (0,)
+ 
+@@ -68,7 +68,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+         # if one is installed.
+         with warnings.catch_warnings(record=True) as w:
+             soup = BeautifulStoneSoup("<b />")
+-        self.assertEqual(u"<b/>", unicode(soup.b))
++        self.assertEqual("<b/>", str(soup.b))
+         self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
+ 
+     def test_tracking_line_numbers(self):
+diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
+index 857eb41..e1035ea 100644
+--- a/bs4/tests/test_soup.py
++++ b/bs4/tests/test_soup.py
+@@ -51,17 +51,17 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
+ class TestConstructor(SoupTest):
+ 
+     def test_short_unicode_input(self):
+-        data = u"<h1>éé</h1>"
++        data = "<h1>éé</h1>"
+         soup = self.soup(data)
+-        self.assertEqual(u"éé", soup.h1.string)
++        self.assertEqual("éé", soup.h1.string)
+ 
+     def test_embedded_null(self):
+-        data = u"<h1>foo\0bar</h1>"
++        data = "<h1>foo\0bar</h1>"
+         soup = self.soup(data)
+-        self.assertEqual(u"foo\0bar", soup.h1.string)
++        self.assertEqual("foo\0bar", soup.h1.string)
+ 
+     def test_exclude_encodings(self):
+-        utf8_data = u"Räksmörgås".encode("utf-8")
++        utf8_data = "Räksmörgås".encode("utf-8")
+         soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
+         self.assertEqual("windows-1252", soup.original_encoding)
+ 
+@@ -127,7 +127,7 @@ class TestConstructor(SoupTest):
+             yield markup, None, None, False
+             
+         import re
+-        self.assertRaisesRegexp(
++        self.assertRaisesRegex(
+             ParserRejectedMarkup,
+             "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
+             BeautifulSoup, '', builder=Mock,
+@@ -303,7 +303,7 @@ class TestWarnings(SoupTest):
+         with warnings.catch_warnings(record=True) as warning_list:
+             # note - this url must differ from the bytes one otherwise
+             # python's warnings system swallows the second warning
+-            soup = self.soup(u"http://www.crummyunicode.com/")
++            soup = self.soup("http://www.crummyunicode.com/")
+         warning = self._assert_warning(
+             warning_list, MarkupResemblesLocatorWarning
+         )
+@@ -319,7 +319,7 @@ class TestWarnings(SoupTest):
+ 
+     def test_url_warning_with_unicode_and_space(self):
+         with warnings.catch_warnings(record=True) as warning_list:
+-            soup = self.soup(u"http://www.crummyuncode.com/ is great")
++            soup = self.soup("http://www.crummyuncode.com/ is great")
+         self.assertFalse(any("looks like a URL" in str(w.message) 
+             for w in warning_list))
+ 
+@@ -341,9 +341,9 @@ class TestEntitySubstitution(unittest.TestCase):
+     def test_simple_html_substitution(self):
+         # Unicode characters corresponding to named HTML entites
+         # are substituted, and no others.
+-        s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
++        s = "foo\u2200\N{SNOWMAN}\u00f5bar"
+         self.assertEqual(self.sub.substitute_html(s),
+-                          u"foo&forall;\N{SNOWMAN}&otilde;bar")
++                          "foo&forall;\N{SNOWMAN}&otilde;bar")
+ 
+     def test_smart_quote_substitution(self):
+         # MS smart quotes are a common source of frustration, so we
+@@ -408,7 +408,7 @@ class TestEncodingConversion(SoupTest):
+ 
+     def setUp(self):
+         super(TestEncodingConversion, self).setUp()
+-        self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
++        self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
+         self.utf8_data = self.unicode_data.encode("utf-8")
+         # Just so you know what it looks like.
+         self.assertEqual(
+@@ -428,7 +428,7 @@ class TestEncodingConversion(SoupTest):
+             ascii = b"<foo>a</foo>"
+             soup_from_ascii = self.soup(ascii)
+             unicode_output = soup_from_ascii.decode()
+-            self.assertTrue(isinstance(unicode_output, unicode))
++            self.assertTrue(isinstance(unicode_output, str))
+             self.assertEqual(unicode_output, self.document_for(ascii.decode()))
+             self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
+         finally:
+@@ -440,7 +440,7 @@ class TestEncodingConversion(SoupTest):
+         # is not set.
+         soup_from_unicode = self.soup(self.unicode_data)
+         self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
+-        self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
++        self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
+         self.assertEqual(soup_from_unicode.original_encoding, None)
+ 
+     def test_utf8_in_unicode_out(self):
+@@ -448,7 +448,7 @@ class TestEncodingConversion(SoupTest):
+         # attribute is set.
+         soup_from_utf8 = self.soup(self.utf8_data)
+         self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
+-        self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
++        self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
+ 
+     def test_utf8_out(self):
+         # The internal data structures can be encoded as UTF-8.
+@@ -459,7 +459,7 @@ class TestEncodingConversion(SoupTest):
+         PYTHON_3_PRE_3_2,
+         "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
+     def test_attribute_name_containing_unicode_characters(self):
+-        markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
++        markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
+         self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
+ 
+ class TestUnicodeDammit(unittest.TestCase):
+@@ -526,7 +526,7 @@ class TestUnicodeDammit(unittest.TestCase):
+ 
+     def test_exclude_encodings(self):
+         # This is UTF-8.
+-        utf8_data = u"Räksmörgås".encode("utf-8")
++        utf8_data = "Räksmörgås".encode("utf-8")
+ 
+         # But if we exclude UTF-8 from consideration, the guess is
+         # Windows-1252.
+diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
+index 2246346..b4f2a86 100644
+--- a/bs4/tests/test_tree.py
++++ b/bs4/tests/test_tree.py
+@@ -75,13 +75,13 @@ class TestFind(TreeTest):
+         self.assertEqual(soup.find("b").string, "2")
+ 
+     def test_unicode_text_find(self):
+-        soup = self.soup(u'<h1>Räksmörgås</h1>')
+-        self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
++        soup = self.soup('<h1>Räksmörgås</h1>')
++        self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
+ 
+     def test_unicode_attribute_find(self):
+-        soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
++        soup = self.soup('<h1 id="Räksmörgås">here it is</h1>')
+         str(soup)
+-        self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
++        self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
+ 
+ 
+     def test_find_everything(self):
+@@ -101,17 +101,17 @@ class TestFindAll(TreeTest):
+         """You can search the tree for text nodes."""
+         soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
+         # Exact match.
+-        self.assertEqual(soup.find_all(string="bar"), [u"bar"])
+-        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
++        self.assertEqual(soup.find_all(string="bar"), ["bar"])
++        self.assertEqual(soup.find_all(text="bar"), ["bar"])
+         # Match any of a number of strings.
+         self.assertEqual(
+-            soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
++            soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
+         # Match a regular expression.
+         self.assertEqual(soup.find_all(text=re.compile('.*')),
+-                         [u"Foo", u"bar", u'\xbb'])
++                         ["Foo", "bar", '\xbb'])
+         # Match anything.
+         self.assertEqual(soup.find_all(text=True),
+-                         [u"Foo", u"bar", u'\xbb'])
++                         ["Foo", "bar", '\xbb'])
+ 
+     def test_find_all_limit(self):
+         """You can limit the number of items returned by find_all."""
+@@ -254,8 +254,8 @@ class TestFindAllByAttribute(TreeTest):
+                            ["Matching a.", "Matching b."])
+ 
+     def test_find_all_by_utf8_attribute_value(self):
+-        peace = u"םולש".encode("utf8")
+-        data = u'<a title="םולש"></a>'.encode("utf8")
++        peace = "םולש".encode("utf8")
++        data = '<a title="םולש"></a>'.encode("utf8")
+         soup = self.soup(data)
+         self.assertEqual([soup.a], soup.find_all(title=peace))
+         self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
+@@ -444,7 +444,7 @@ class TestSmooth(TreeTest):
+         # output.
+ 
+         # Since the <span> tag has two children, its .string is None.
+-        self.assertEquals(None, div.span.string)
++        self.assertEqual(None, div.span.string)
+ 
+         self.assertEqual(7, len(div.contents))
+         div.smooth()
+@@ -755,18 +755,18 @@ class TestTag(SoupTest):
+ 
+         # No list of whitespace-preserving tags -> pretty-print
+         tag._preserve_whitespace_tags = None
+-        self.assertEquals(True, tag._should_pretty_print(0))
++        self.assertEqual(True, tag._should_pretty_print(0))
+ 
+         # List exists but tag is not on the list -> pretty-print
+         tag.preserve_whitespace_tags = ["some_other_tag"]
+-        self.assertEquals(True, tag._should_pretty_print(1))
++        self.assertEqual(True, tag._should_pretty_print(1))
+ 
+         # Indent level is None -> don't pretty-print
+-        self.assertEquals(False, tag._should_pretty_print(None))
++        self.assertEqual(False, tag._should_pretty_print(None))
+         
+         # Tag is on the whitespace-preserving list -> don't pretty-print
+         tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"]
+-        self.assertEquals(False, tag._should_pretty_print(1))
++        self.assertEqual(False, tag._should_pretty_print(1))
+ 
+         
+ class TestTagCreation(SoupTest):
+@@ -905,10 +905,10 @@ class TestTreeModification(SoupTest):
+             assert not isinstance(i, BeautifulSoup)
+         
+         p1, p2, p3, p4 = list(soup.children)
+-        self.assertEquals("And now, a word:", p1.string)
+-        self.assertEquals("p2", p2.string)
+-        self.assertEquals("p3", p3.string)
+-        self.assertEquals("And we're back.", p4.string)
++        self.assertEqual("And now, a word:", p1.string)
++        self.assertEqual("p2", p2.string)
++        self.assertEqual("p3", p3.string)
++        self.assertEqual("And we're back.", p4.string)
+         
+         
+     def test_replace_with_maintains_next_element_throughout(self):
+@@ -1015,8 +1015,8 @@ class TestTreeModification(SoupTest):
+         d1 = soup.find('div', id='d1')
+         d2 = soup.find('div', id='d2')
+         d2.extend(d1)
+-        self.assertEqual(u'<div id="d1"></div>', d1.decode())
+-        self.assertEqual(u'<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode())
++        self.assertEqual('<div id="d1"></div>', d1.decode())
++        self.assertEqual('<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode())
+         
+     def test_move_tag_to_beginning_of_parent(self):
+         data = "<a><b></b><c></c><d></d></a>"
+@@ -1262,7 +1262,7 @@ class TestTreeModification(SoupTest):
+ <script>baz</script>
+ </html>""")
+         [soup.script.extract() for i in soup.find_all("script")]
+-        self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
++        self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body))
+ 
+ 
+     def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
+@@ -1524,7 +1524,7 @@ class TestPersistence(SoupTest):
+         soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
+         encoding = soup.original_encoding
+         copy = soup.__copy__()
+-        self.assertEqual(u"<p> </p>", unicode(copy))
++        self.assertEqual("<p> </p>", str(copy))
+         self.assertEqual(encoding, copy.original_encoding)
+ 
+     def test_copy_preserves_builder_information(self):
+@@ -1554,14 +1554,14 @@ class TestPersistence(SoupTest):
+         
+     def test_unicode_pickle(self):
+         # A tree containing Unicode characters can be pickled.
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+         loaded = pickle.loads(dumped)
+         self.assertEqual(loaded.decode(), soup.decode())
+ 
+     def test_copy_navigablestring_is_not_attached_to_tree(self):
+-        html = u"<b>Foo<a></a></b><b>Bar</b>"
++        html = "<b>Foo<a></a></b><b>Bar</b>"
+         soup = self.soup(html)
+         s1 = soup.find(string="Foo")
+         s2 = copy.copy(s1)
+@@ -1573,7 +1573,7 @@ class TestPersistence(SoupTest):
+         self.assertEqual(None, s2.previous_element)
+ 
+     def test_copy_navigablestring_subclass_has_same_type(self):
+-        html = u"<b><!--Foo--></b>"
++        html = "<b><!--Foo--></b>"
+         soup = self.soup(html)
+         s1 = soup.string
+         s2 = copy.copy(s1)
+@@ -1581,19 +1581,19 @@ class TestPersistence(SoupTest):
+         self.assertTrue(isinstance(s2, Comment))
+ 
+     def test_copy_entire_soup(self):
+-        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
++        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
+         soup = self.soup(html)
+         soup_copy = copy.copy(soup)
+         self.assertEqual(soup, soup_copy)
+ 
+     def test_copy_tag_copies_contents(self):
+-        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
++        html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
+         soup = self.soup(html)
+         div = soup.div
+         div_copy = copy.copy(div)
+ 
+         # The two tags look the same, and evaluate to equal.
+-        self.assertEqual(unicode(div), unicode(div_copy))
++        self.assertEqual(str(div), str(div_copy))
+         self.assertEqual(div, div_copy)
+ 
+         # But they're not the same object.
+@@ -1609,17 +1609,17 @@ class TestPersistence(SoupTest):
+ class TestSubstitutions(SoupTest):
+ 
+     def test_default_formatter_is_minimal(self):
+-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
++        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter="minimal")
+         # The < is converted back into &lt; but the e-with-acute is left alone.
+         self.assertEqual(
+             decoded,
+             self.document_for(
+-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
++                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+ 
+     def test_formatter_html(self):
+-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
++        markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter="html")
+         self.assertEqual(
+@@ -1627,7 +1627,7 @@ class TestSubstitutions(SoupTest):
+             self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+ 
+     def test_formatter_html5(self):
+-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
++        markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter="html5")
+         self.assertEqual(
+@@ -1635,49 +1635,49 @@ class TestSubstitutions(SoupTest):
+             self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+         
+     def test_formatter_minimal(self):
+-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
++        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter="minimal")
+         # The < is converted back into &lt; but the e-with-acute is left alone.
+         self.assertEqual(
+             decoded,
+             self.document_for(
+-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
++                "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+ 
+     def test_formatter_null(self):
+-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
++        markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter=None)
+         # Neither the angle brackets nor the e-with-acute are converted.
+         # This is not valid HTML, but it's what the user wanted.
+         self.assertEqual(decoded,
+-                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
++                          self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
+ 
+     def test_formatter_custom(self):
+-        markup = u"<b>&lt;foo&gt;</b><b>bar</b><br/>"
++        markup = "<b>&lt;foo&gt;</b><b>bar</b><br/>"
+         soup = self.soup(markup)
+         decoded = soup.decode(formatter = lambda x: x.upper())
+         # Instead of normal entity conversion code, the custom
+         # callable is called on every string.
+         self.assertEqual(
+             decoded,
+-            self.document_for(u"<b><FOO></b><b>BAR</b><br/>"))
++            self.document_for("<b><FOO></b><b>BAR</b><br/>"))
+ 
+     def test_formatter_is_run_on_attribute_values(self):
+-        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
++        markup = '<a href="http://a.com?a=b&c=é">e</a>'
+         soup = self.soup(markup)
+         a = soup.a
+ 
+-        expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
++        expect_minimal = '<a href="http://a.com?a=b&amp;c=é">e</a>'
+ 
+         self.assertEqual(expect_minimal, a.decode())
+         self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
+ 
+-        expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
++        expect_html = '<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
+         self.assertEqual(expect_html, a.decode(formatter="html"))
+ 
+         self.assertEqual(markup, a.decode(formatter=None))
+-        expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
++        expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>'
+         self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
+ 
+     def test_formatter_skips_script_tag_for_html_documents(self):
+@@ -1703,7 +1703,7 @@ class TestSubstitutions(SoupTest):
+         # Everything outside the <pre> tag is reformatted, but everything
+         # inside is left alone.
+         self.assertEqual(
+-            u'<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>',
++            '<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>',
+             soup.div.prettify())
+ 
+     def test_prettify_accepts_formatter_function(self):
+@@ -1713,14 +1713,14 @@ class TestSubstitutions(SoupTest):
+ 
+     def test_prettify_outputs_unicode_by_default(self):
+         soup = self.soup("<a></a>")
+-        self.assertEqual(unicode, type(soup.prettify()))
++        self.assertEqual(str, type(soup.prettify()))
+ 
+     def test_prettify_can_encode_data(self):
+         soup = self.soup("<a></a>")
+         self.assertEqual(bytes, type(soup.prettify("utf-8")))
+ 
+     def test_html_entity_substitution_off_by_default(self):
+-        markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
++        markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
+         soup = self.soup(markup)
+         encoded = soup.b.encode("utf-8")
+         self.assertEqual(encoded, markup.encode('utf-8'))
+@@ -1764,48 +1764,48 @@ class TestEncoding(SoupTest):
+     """Test the ability to encode objects into strings."""
+ 
+     def test_unicode_string_can_be_encoded(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertEqual(soup.b.string.encode("utf-8"),
+-                          u"\N{SNOWMAN}".encode("utf-8"))
++                          "\N{SNOWMAN}".encode("utf-8"))
+ 
+     def test_tag_containing_unicode_string_can_be_encoded(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertEqual(
+             soup.b.encode("utf-8"), html.encode("utf-8"))
+ 
+     def test_encoding_substitutes_unrecognized_characters_by_default(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
+ 
+     def test_encoding_can_be_made_strict(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertRaises(
+             UnicodeEncodeError, soup.encode, "ascii", errors="strict")
+ 
+     def test_decode_contents(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+-        self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
++        self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
+ 
+     def test_encode_contents(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertEqual(
+-            u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
++            "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
+                 encoding="utf8"))
+ 
+     def test_deprecated_renderContents(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         self.assertEqual(
+-            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
++            "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+ 
+     def test_repr(self):
+-        html = u"<b>\N{SNOWMAN}</b>"
++        html = "<b>\N{SNOWMAN}</b>"
+         soup = self.soup(html)
+         if PY3K:
+             self.assertEqual(html, repr(soup))
+@@ -1993,7 +1993,7 @@ class TestSoupSelector(TreeTest):
+         els = self.soup.select('title')
+         self.assertEqual(len(els), 1)
+         self.assertEqual(els[0].name, 'title')
+-        self.assertEqual(els[0].contents, [u'The title'])
++        self.assertEqual(els[0].contents, ['The title'])
+ 
+     def test_one_tag_many(self):
+         els = self.soup.select('div')
+@@ -2039,7 +2039,7 @@ class TestSoupSelector(TreeTest):
+         self.assertEqual(dashed[0]['id'], 'dash2')
+ 
+     def test_dashed_tag_text(self):
+-        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
++        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
+ 
+     def test_select_dashed_matches_find_all(self):
+         self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
+@@ -2225,12 +2225,12 @@ class TestSoupSelector(TreeTest):
+         # Try to select first paragraph
+         els = self.soup.select('div#inner p:nth-of-type(1)')
+         self.assertEqual(len(els), 1)
+-        self.assertEqual(els[0].string, u'Some text')
++        self.assertEqual(els[0].string, 'Some text')
+ 
+         # Try to select third paragraph
+         els = self.soup.select('div#inner p:nth-of-type(3)')
+         self.assertEqual(len(els), 1)
+-        self.assertEqual(els[0].string, u'Another')
++        self.assertEqual(els[0].string, 'Another')
+ 
+         # Try to select (non-existent!) fourth paragraph
+         els = self.soup.select('div#inner p:nth-of-type(4)')
+@@ -2243,7 +2243,7 @@ class TestSoupSelector(TreeTest):
+     def test_nth_of_type_direct_descendant(self):
+         els = self.soup.select('div#inner > p:nth-of-type(1)')
+         self.assertEqual(len(els), 1)
+-        self.assertEqual(els[0].string, u'Some text')
++        self.assertEqual(els[0].string, 'Some text')
+ 
+     def test_id_child_selector_nth_of_type(self):
+         self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
+@@ -2324,7 +2324,7 @@ class TestSoupSelector(TreeTest):
+         markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
+         soup = BeautifulSoup(markup, 'html.parser')
+         selected = soup.select(".c1, .c2")
+-        self.assertEquals(3, len(selected))
++        self.assertEqual(3, len(selected))
+ 
+         # Verify that find_all finds the same elements, though because
+         # of an implementation detail it finds them in a different
+diff --git a/convert-py3k b/convert-py3k
+deleted file mode 100755
+index 05fab53..0000000
+--- a/convert-py3k
++++ /dev/null
+@@ -1,16 +0,0 @@
+-#!/bin/sh
+-#
+-# The Python 2 source is the definitive source. This script uses 2to3-3.2 to
+-# create a new python3/bs4 source tree that works under Python 3.
+-#
+-# See README.txt to see how to run the test suite after conversion.
+-echo "About to destroy and rebuild the py3k/bs4 directory."
+-echo "If you've got stuff in there, Ctrl-C out of this script or answer 'n'."
+-mkdir -p py3k
+-rm -rfI py3k/bs4
+-cp -r bs4/ py3k/
+-2to3 -w py3k
+-echo ""
+-echo "OK, conversion is done."
+-echo "Now running the unit tests."
+-(cd py3k && python3 -m unittest discover -s bs4)
+\ No newline at end of file
+diff --git a/doc/source/index.rst b/doc/source/index.rst
+index 34ec7cf..b8ca011 100644
+--- a/doc/source/index.rst
++++ b/doc/source/index.rst
+@@ -170,16 +170,13 @@ Installing Beautiful Soup
+ If you're using a recent version of Debian or Ubuntu Linux, you can
+ install Beautiful Soup with the system package manager:
+ 
+-:kbd:`$ apt-get install python-bs4` (for Python 2)
+-
+-:kbd:`$ apt-get install python3-bs4` (for Python 3)
++:kbd:`$ apt-get install python3-bs4`
+ 
+ Beautiful Soup 4 is published through PyPi, so if you can't install it
+ with the system packager, you can install it with ``easy_install`` or
+-``pip``. The package name is ``beautifulsoup4``, and the same package
+-works on Python 2 and Python 3. Make sure you use the right version of
+-``pip`` or ``easy_install`` for your Python version (these may be named
+-``pip3`` and ``easy_install3`` respectively if you're using Python 3).
++``pip``. The package name is ``beautifulsoup4``. Make sure you use the
++right version of ``pip`` or ``easy_install`` for your Python version
++(these may be named ``pip3`` and ``easy_install3`` respectively).
+ 
+ :kbd:`$ easy_install beautifulsoup4`
+ 
+@@ -202,40 +199,8 @@ package the entire library with your application. You can download the
+ tarball, copy its ``bs4`` directory into your application's codebase,
+ and use Beautiful Soup without installing it at all.
+ 
+-I use Python 2.7 and Python 3.8 to develop Beautiful Soup, but it
+-should work with other recent versions.
+-
+-Problems after installation
+----------------------------
+-
+-Beautiful Soup is packaged as Python 2 code. When you install it for
+-use with Python 3, it's automatically converted to Python 3 code. If
+-you don't install the package, the code won't be converted. There have
+-also been reports on Windows machines of the wrong version being
+-installed.
+-
+-If you get the ``ImportError`` "No module named HTMLParser", your
+-problem is that you're running the Python 2 version of the code under
+-Python 3.
+-
+-If you get the ``ImportError`` "No module named html.parser", your
+-problem is that you're running the Python 3 version of the code under
+-Python 2.
+-
+-In both cases, your best bet is to completely remove the Beautiful
+-Soup installation from your system (including any directory created
+-when you unzipped the tarball) and try the installation again.
+-
+-If you get the ``SyntaxError`` "Invalid syntax" on the line
+-``ROOT_TAG_NAME = u'[document]'``, you need to convert the Python 2
+-code to Python 3. You can do this either by installing the package:
+-
+-:kbd:`$ python3 setup.py install`
+-
+-or by manually running Python's ``2to3`` conversion script on the
+-``bs4`` directory:
+-
+-:kbd:`$ 2to3-3.2 -w bs4`
++I use Python 3.8 to develop Beautiful Soup, but it should work with
++other recent versions.
+ 
+ .. _parser-installation:
+ 
+@@ -272,8 +237,7 @@ This table summarizes the advantages and disadvantages of each parser library:
+ +----------------------+--------------------------------------------+--------------------------------+--------------------------+
+ | Python's html.parser | ``BeautifulSoup(markup, "html.parser")``   | * Batteries included           | * Not as fast as lxml,   |
+ |                      |                                            | * Decent speed                 |   less lenient than      |
+-|                      |                                            | * Lenient (As of Python 2.7.3  |   html5lib.              |
+-|                      |                                            |   and 3.2.)                    |                          |
++|                      |                                            | * Lenient (As of Python 3.2)   |   html5lib.              |
+ +----------------------+--------------------------------------------+--------------------------------+--------------------------+
+ | lxml's HTML parser   | ``BeautifulSoup(markup, "lxml")``          | * Very fast                    | * External C dependency  |
+ |                      |                                            | * Lenient                      |                          |
+@@ -289,9 +253,9 @@ This table summarizes the advantages and disadvantages of each parser library:
+ +----------------------+--------------------------------------------+--------------------------------+--------------------------+
+ 
+ If you can, I recommend you install and use lxml for speed. If you're
+-using a very old version of Python -- earlier than 2.7.3 or 3.2.2 --
+-it's `essential` that you install lxml or html5lib. Python's built-in
+-HTML parser is just not very good in those old versions.
++using a very old version of Python -- earlier than 3.2.2 -- it's
++`essential` that you install lxml or html5lib. Python's built-in HTML
++parser is just not very good in those old versions.
+ 
+ Note that if a document is invalid, different parsers will generate
+ different Beautiful Soup trees for it. See `Differences
+@@ -481,8 +445,7 @@ uses the ``NavigableString`` class to contain these bits of text::
+ A ``NavigableString`` is just like a Python Unicode string, except
+ that it also supports some of the features described in `Navigating
+ the tree`_ and `Searching the tree`_. You can convert a
+-``NavigableString`` to a Unicode string with ``unicode()`` (in
+-Python 2) or ``str`` (in Python 3)::
++``NavigableString`` to a Unicode string with ``str``::
+ 
+  unicode_string = str(tag.string)
+  unicode_string
+@@ -2230,8 +2193,7 @@ Non-pretty printing
+ -------------------
+ 
+ If you just want a string, with no fancy formatting, you can call
+-``str()`` on a ``BeautifulSoup`` object (``unicode()`` in Python 2),
+-or on a ``Tag`` within it::
++``str()`` on a ``BeautifulSoup`` object, or on a ``Tag`` within it::
+ 
+  str(soup)
+  # '<html><head></head><body><a href="http://example.com/">I linked to <i>example.com</i></a></body></html>'
+@@ -3139,10 +3101,10 @@ Version mismatch problems
+ -------------------------
+ 
+ * ``SyntaxError: Invalid syntax`` (on the line ``ROOT_TAG_NAME =
+-  '[document]'``): Caused by running the Python 2 version of
++  '[document]'``): Caused by running an old Python 2 version of
+   Beautiful Soup under Python 3, without converting the code.
+ 
+-* ``ImportError: No module named HTMLParser`` - Caused by running the
++* ``ImportError: No module named HTMLParser`` - Caused by running an old
+   Python 2 version of Beautiful Soup under Python 3.
+ 
+ * ``ImportError: No module named html.parser`` - Caused by running the
+diff --git a/setup.py b/setup.py
+index 7b4b393..b9b4ed2 100644
+--- a/setup.py
++++ b/setup.py
+@@ -4,23 +4,22 @@ from setuptools import (
+ )
+ import sys
+ 
++from bs4 import __version__
++
+ with open("README.md", "r") as fh:
+     long_description = fh.read()
+ 
+ setup(
+     name="beautifulsoup4",
+-    # NOTE: We can't import __version__ from bs4 because bs4/__init__.py is Python 2 code,
+-    # and converting it to Python 3 means going through this code to run 2to3.
+-    # So we have to specify it twice for the time being.
+-    version = '4.9.3',
++    version = __version__,
+     author="Leonard Richardson",
+     author_email='leonardr@segfault.org',
+     url="http://www.crummy.com/software/BeautifulSoup/bs4/",
+     download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/",
+     description="Screen-scraping library",
++    python_requires='>3.0.0',
+     install_requires=[
+-        "soupsieve >1.2; python_version>='3.0'",
+-        "soupsieve >1.2, <2.0; python_version<'3.0'",
++        "soupsieve >1.2",
+     ],
+     long_description=long_description,
+     long_description_content_type="text/markdown",
+@@ -30,12 +29,10 @@ setup(
+         'lxml' : [ 'lxml'],
+         'html5lib' : ['html5lib'],
+     },
+-    use_2to3 = True,
+     classifiers=["Development Status :: 5 - Production/Stable",
+                  "Intended Audience :: Developers",
+                  "License :: OSI Approved :: MIT License",
+                  "Programming Language :: Python",
+-                 "Programming Language :: Python :: 2.7",
+                  'Programming Language :: Python :: 3',
+                  "Topic :: Text Processing :: Markup :: HTML",
+                  "Topic :: Text Processing :: Markup :: XML",
+diff --git a/test-all-versions b/test-all-versions
+index 01e436b..fe7758a 100755
+--- a/test-all-versions
++++ b/test-all-versions
+@@ -1 +1 @@
+-python2.7 -m unittest discover -s bs4 && ./convert-py3k
++python3 -m unittest discover -s bs4
+-- 
+2.13.7
+
diff --git a/python-beautifulsoup4.spec b/python-beautifulsoup4.spec
index 976b1f9..21656d9 100644
--- a/python-beautifulsoup4.spec
+++ b/python-beautifulsoup4.spec
@@ -1,12 +1,15 @@
 Name:           python-beautifulsoup4
 Version:        4.9.3
-Release:        1
+Release:        2
 Summary:        HTML/XML parser for quick-turnaround projects
 License:        MIT
 URL:            http://www.crummy.com/software/BeautifulSoup/
 Source0:        https://files.pythonhosted.org/packages/source/b/beautifulsoup4/beautifulsoup4-%{version}.tar.gz
 BuildArch:      noarch
 BuildRequires:  python3-devel python3-html5lib python3-setuptools python3-lxml
+BuildRequires:  python3-soupsieve
+
+Patch6000:      backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch 
 
 %global _description\
 This package provides a python library which is designed for quick\
@@ -24,19 +27,15 @@ Requires:       python3-lxml
 
 %prep
 %setup -q -n beautifulsoup4-%{version}
-rm -rf %{py3dir} && cp -a . %{py3dir}
+%patch6000 -p1
 
 %build
-pushd %{py3dir}
-2to3 --write --nobackups .
 %{py3_build}
 
 %install
-pushd %{py3dir}
 %{py3_install}
 
 %check
-pushd %{py3dir}
 %{__python3} -m unittest discover -s bs4 || :
 
 %files -n python3-beautifulsoup4
@@ -46,6 +45,9 @@ pushd %{py3dir}
 %{python3_sitelib}/bs4
 
 %changelog
+* Mon Jan 10 2022 shixuantong <shixuantong@huawei.com> - 4.9.3-2
+- converts the code base to Python 3, and removes the use_2to3 reference in setup.py.
+
 * Mon Jul 26 2021 liusheng<liusheng2048@huawei.com>  - 4.9.3-1
 - Upgrade to version 4.9.3