diff --git a/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch b/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch deleted file mode 100644 index 1cab095..0000000 --- a/backport-converts-the-code-base-to-Python-3-and-removes-the-u.patch +++ /dev/null @@ -1,2062 +0,0 @@ -diff --git a/README.md b/README.md -index 92dd339..884f9eb 100644 ---- a/README.md -+++ b/README.md -@@ -53,17 +53,11 @@ To go beyond the basics, [comprehensive documentation is available](http://www.c - - # Note on Python 2 sunsetting - --Since 2012, Beautiful Soup has been developed as a Python 2 library --which is automatically converted to Python 3 code as necessary. This --makes it impossible to take advantage of some features of Python --3. -- --For this reason, I plan to discontinue Beautiful Soup's Python 2 --support at some point after December 31, 2020: one year after the --sunset date for Python 2 itself. Beyond that point, new Beautiful Soup --development will exclusively target Python 3. Of course, older --releases of Beautiful Soup, which support both versions, will continue --to be available. -+Beautiful Soup's support for Python 2 was discontinued on December 31, -+2020: one year after the sunset date for Python 2 itself. From this -+point onward, new Beautiful Soup development will exclusively target -+Python 3. The final release of Beautiful Soup 4 to support Python 2 -+was 4.9.3. - - # Supporting the project - -@@ -93,10 +87,5 @@ $ nosetests - ``` - - ``` --$ python -m unittest discover -s bs4 -+$ python3 -m unittest discover -s bs4 - ``` -- --If you checked out the source tree, you should see a script in the --home directory called test-all-versions. This script will run the unit --tests under Python 2, then create a temporary Python 3 conversion of --the source and run the unit tests again under Python 3. -diff --git a/bs4/__init__.py b/bs4/__init__.py -index 8f78809..51ccc21 100644 ---- a/bs4/__init__.py -+++ b/bs4/__init__.py -@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a - provides methods and Pythonic idioms that make it easy to navigate, - search, and modify the parse tree. - --Beautiful Soup works with Python 2.7 and up. It works better if lxml -+Beautiful Soup works with Python 3.5 and up. It works better if lxml - and/or html5lib is installed. - - For more than you ever wanted to know about Beautiful Soup, see the -@@ -29,6 +29,11 @@ import sys - import traceback - import warnings - -+# The very first thing we do is give a useful error if someone is -+# running this code under Python 2. -+if sys.version_info.major < 3: -+ raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.') -+ - from .builder import builder_registry, ParserRejectedMarkup - from .dammit import UnicodeDammit - from .element import ( -@@ -49,10 +54,6 @@ from .element import ( - TemplateString, - ) - --# The very first thing we do is give a useful error if someone is --# running this code under Python 3 without converting it. --'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' -- - # Define some custom warnings. - class GuessedAtParserWarning(UserWarning): - """The warning issued when BeautifulSoup has to guess what parser to -@@ -100,7 +101,7 @@ class BeautifulSoup(Tag): - # Since BeautifulSoup subclasses Tag, it's possible to treat it as - # a Tag with a .name. This name makes it clear the BeautifulSoup - # object isn't a real markup tag. -- ROOT_TAG_NAME = u'[document]' -+ ROOT_TAG_NAME = '[document]' - - # If the end-user gives no indication which tree builder they - # want, look for one with these features. -@@ -217,7 +218,7 @@ class BeautifulSoup(Tag): - from_encoding = from_encoding or deprecated_argument( - "fromEncoding", "from_encoding") - -- if from_encoding and isinstance(markup, unicode): -+ if from_encoding and isinstance(markup, str): - warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") - from_encoding = None - -@@ -234,7 +235,7 @@ class BeautifulSoup(Tag): - builder_class = builder - builder = None - elif builder is None: -- if isinstance(features, basestring): -+ if isinstance(features, str): - features = [features] - if features is None or len(features) == 0: - features = self.DEFAULT_BUILDER_FEATURES -@@ -309,13 +310,13 @@ class BeautifulSoup(Tag): - markup = markup.read() - elif len(markup) <= 256 and ( - (isinstance(markup, bytes) and not b'<' in markup) -- or (isinstance(markup, unicode) and not u'<' in markup) -+ or (isinstance(markup, str) and not '<' in markup) - ): - # Print out warnings for a couple beginner problems - # involving passing non-markup to Beautiful Soup. - # Beautiful Soup will still parse the input as markup, - # just in case that's what the user really wants. -- if (isinstance(markup, unicode) -+ if (isinstance(markup, str) - and not os.path.supports_unicode_filenames): - possible_filename = markup.encode("utf8") - else: -@@ -323,7 +324,7 @@ class BeautifulSoup(Tag): - is_file = False - try: - is_file = os.path.exists(possible_filename) -- except Exception, e: -+ except Exception as e: - # This is almost certainly a problem involving - # characters not valid in filenames on this - # system. Just let it go. -@@ -353,9 +354,9 @@ class BeautifulSoup(Tag): - pass - - if not success: -- other_exceptions = [unicode(e) for e in rejections] -+ other_exceptions = [str(e) for e in rejections] - raise ParserRejectedMarkup( -- u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) -+ "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) - ) - - # Clear out the markup and remove the builder's circular -@@ -406,9 +407,9 @@ class BeautifulSoup(Tag): - if isinstance(markup, bytes): - space = b' ' - cant_start_with = (b"http:", b"https:") -- elif isinstance(markup, unicode): -- space = u' ' -- cant_start_with = (u"http:", u"https:") -+ elif isinstance(markup, str): -+ space = ' ' -+ cant_start_with = ("http:", "https:") - else: - return - -@@ -545,7 +546,7 @@ class BeautifulSoup(Tag): - containerClass = self.string_container(containerClass) - - if self.current_data: -- current_data = u''.join(self.current_data) -+ current_data = ''.join(self.current_data) - # If whitespace is not preserved, and this string contains - # nothing but ASCII spaces, replace it with a single space - # or newline. -@@ -748,9 +749,9 @@ class BeautifulSoup(Tag): - eventual_encoding = None - if eventual_encoding != None: - encoding_part = ' encoding="%s"' % eventual_encoding -- prefix = u'\n' % encoding_part -+ prefix = '\n' % encoding_part - else: -- prefix = u'' -+ prefix = '' - if not pretty_print: - indent_level = None - else: -@@ -788,4 +789,4 @@ class FeatureNotFound(ValueError): - if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) -- print(soup.prettify()) -+ print((soup.prettify())) -diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py -index 03da4c6..03fbd6a 100644 ---- a/bs4/builder/__init__.py -+++ b/bs4/builder/__init__.py -@@ -300,13 +300,13 @@ class TreeBuilder(object): - universal = self.cdata_list_attributes.get('*', []) - tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), None) -- for attr in attrs.keys(): -+ for attr in list(attrs.keys()): - if attr in universal or (tag_specific and attr in tag_specific): - # We have a "class"-type attribute whose string - # value is a whitespace-separated list of - # values. Split it into a list. - value = attrs[attr] -- if isinstance(value, basestring): -+ if isinstance(value, str): - values = nonwhitespace_re.findall(value) - else: - # html5lib sometimes calls setAttributes twice -@@ -496,7 +496,7 @@ class ParserRejectedMarkup(Exception): - """ - if isinstance(message_or_exception, Exception): - e = message_or_exception -- message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e)) -+ message_or_exception = "%s: %s" % (e.__class__.__name__, str(e)) - super(ParserRejectedMarkup, self).__init__(message_or_exception) - - # Builders are registered in reverse order of priority, so that custom -diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py -index a1c6134..69aefd7 100644 ---- a/bs4/builder/_html5lib.py -+++ b/bs4/builder/_html5lib.py -@@ -33,7 +33,7 @@ try: - # Pre-0.99999999 - from html5lib.treebuilders import _base as treebuilder_base - new_html5lib = False --except ImportError, e: -+except ImportError as e: - # 0.99999999 and up - from html5lib.treebuilders import base as treebuilder_base - new_html5lib = True -@@ -79,7 +79,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - parser = html5lib.HTMLParser(tree=self.create_treebuilder) - self.underlying_builder.parser = parser - extra_kwargs = dict() -- if not isinstance(markup, unicode): -+ if not isinstance(markup, str): - if new_html5lib: - extra_kwargs['override_encoding'] = self.user_specified_encoding - else: -@@ -87,13 +87,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - doc = parser.parse(markup, **extra_kwargs) - - # Set the character encoding detected by the tokenizer. -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # We need to special-case this because html5lib sets - # charEncoding to UTF-8 if it gets Unicode input. - doc.original_encoding = None - else: - original_encoding = parser.tokenizer.stream.charEncoding[0] -- if not isinstance(original_encoding, basestring): -+ if not isinstance(original_encoding, str): - # In 0.99999999 and up, the encoding is an html5lib - # Encoding object. We want to use a string for compatibility - # with other tree builders. -@@ -110,7 +110,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'
%s' % fragment -+ return '%s' % fragment - - - class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): -@@ -217,7 +217,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): - rv.append("|%s<%s>" % (' ' * indent, name)) - if element.attrs: - attributes = [] -- for name, value in element.attrs.items(): -+ for name, value in list(element.attrs.items()): - if isinstance(name, NamespacedAttribute): - name = "%s %s" % (prefixes[name.namespace], name.name) - if isinstance(value, list): -@@ -272,7 +272,7 @@ class Element(treebuilder_base.Node): - - def appendChild(self, node): - string_child = child = None -- if isinstance(node, basestring): -+ if isinstance(node, str): - # Some other piece of code decided to pass in a string - # instead of creating a TextElement object to contain the - # string. -@@ -289,7 +289,7 @@ class Element(treebuilder_base.Node): - child = node.element - node.parent = self - -- if not isinstance(child, basestring) and child.parent is not None: -+ if not isinstance(child, str) and child.parent is not None: - node.element.extract() - - if (string_child is not None and self.element.contents -@@ -302,7 +302,7 @@ class Element(treebuilder_base.Node): - old_element.replace_with(new_element) - self.soup._most_recent_element = new_element - else: -- if isinstance(node, basestring): -+ if isinstance(node, str): - # Create a brand new NavigableString from this string. - child = self.soup.new_string(node) - -@@ -340,7 +340,7 @@ class Element(treebuilder_base.Node): - - self.soup.builder._replace_cdata_list_attribute_values( - self.name, attributes) -- for name, value in attributes.items(): -+ for name, value in list(attributes.items()): - self.element[name] = value - - # The attributes may contain variables that need substitution. -diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py -index 96a7b7d..88860a9 100644 ---- a/bs4/builder/_htmlparser.py -+++ b/bs4/builder/_htmlparser.py -@@ -8,11 +8,11 @@ __all__ = [ - 'HTMLParserTreeBuilder', - ] - --from HTMLParser import HTMLParser -+from html.parser import HTMLParser - - try: -- from HTMLParser import HTMLParseError --except ImportError, e: -+ from html.parser import HTMLParseError -+except ImportError as e: - # HTMLParseError is removed in Python 3.5. Since it can never be - # thrown in 3.5, we can just define our own class as a placeholder. - class HTMLParseError(Exception): -@@ -219,14 +219,14 @@ class BeautifulSoupHTMLParser(HTMLParser): - continue - try: - data = bytearray([real_name]).decode(encoding) -- except UnicodeDecodeError, e: -+ except UnicodeDecodeError as e: - pass - if not data: - try: -- data = unichr(real_name) -- except (ValueError, OverflowError), e: -+ data = chr(real_name) -+ except (ValueError, OverflowError) as e: - pass -- data = data or u"\N{REPLACEMENT CHARACTER}" -+ data = data or "\N{REPLACEMENT CHARACTER}" - self.handle_data(data) - - def handle_entityref(self, name): -@@ -353,7 +353,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): - document to Unicode and parsing it. Each strategy will be tried - in turn. - """ -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # Parse Unicode as-is. - yield (markup, None, None, False) - return -@@ -376,7 +376,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): - try: - parser.feed(markup) - parser.close() -- except HTMLParseError, e: -+ except HTMLParseError as e: - warnings.warn(RuntimeWarning( - "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) - raise e -diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py -index 1b44d75..432a2c8 100644 ---- a/bs4/builder/_lxml.py -+++ b/bs4/builder/_lxml.py -@@ -8,11 +8,11 @@ __all__ = [ - - try: - from collections.abc import Callable # Python 3.6 --except ImportError , e: -+except ImportError as e: - from collections import Callable - - from io import BytesIO --from StringIO import StringIO -+from io import StringIO - from lxml import etree - from bs4.element import ( - Comment, -@@ -35,7 +35,7 @@ LXML = 'lxml' - - def _invert(d): - "Invert a dictionary." -- return dict((v,k) for k, v in d.items()) -+ return dict((v,k) for k, v in list(d.items())) - - class LXMLTreeBuilderForXML(TreeBuilder): - DEFAULT_PARSER_CLASS = etree.XMLParser -@@ -81,7 +81,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - - :param mapping: A dictionary mapping namespace prefixes to URIs. - """ -- for key, value in mapping.items(): -+ for key, value in list(mapping.items()): - if key and key not in self.soup._namespaces: - # Let the BeautifulSoup object know about a new namespace. - # If there are multiple namespaces defined with the same -@@ -169,12 +169,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): - else: - self.processing_instruction_class = XMLProcessingInstruction - -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # We were given Unicode. Maybe lxml can parse Unicode on - # this system? - yield markup, None, document_declared_encoding, False - -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - # No, apparently not. Convert the Unicode to UTF-8 and - # tell lxml to parse it as UTF-8. - yield (markup.encode("utf8"), "utf8", -@@ -189,7 +189,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - def feed(self, markup): - if isinstance(markup, bytes): - markup = BytesIO(markup) -- elif isinstance(markup, unicode): -+ elif isinstance(markup, str): - markup = StringIO(markup) - - # Call feed() at least once, even if the markup is empty, -@@ -204,7 +204,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - if len(data) != 0: - self.parser.feed(data) - self.parser.close() -- except (UnicodeDecodeError, LookupError, etree.ParserError), e: -+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(e) - - def close(self): -@@ -233,7 +233,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - # Also treat the namespace mapping as a set of attributes on the - # tag, so we can recreate it later. - attrs = attrs.copy() -- for prefix, namespace in nsmap.items(): -+ for prefix, namespace in list(nsmap.items()): - attribute = NamespacedAttribute( - "xmlns", prefix, "http://www.w3.org/2000/xmlns/") - attrs[attribute] = namespace -@@ -242,7 +242,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - # from lxml with namespaces attached to their names, and - # turn then into NamespacedAttribute objects. - new_attrs = {} -- for attr, value in attrs.items(): -+ for attr, value in list(attrs.items()): - namespace, attr = self._getNsTag(attr) - if namespace is None: - new_attrs[attr] = value -@@ -302,7 +302,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'\n%s' % fragment -+ return '\n%s' % fragment - - - class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): -@@ -323,10 +323,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - self.parser = self.parser_for(encoding) - self.parser.feed(markup) - self.parser.close() -- except (UnicodeDecodeError, LookupError, etree.ParserError), e: -+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(e) - - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" -- return u'%s' % fragment -+ return '%s' % fragment -diff --git a/bs4/dammit.py b/bs4/dammit.py -index 33f7b7d..ee3708f 100644 ---- a/bs4/dammit.py -+++ b/bs4/dammit.py -@@ -10,7 +10,7 @@ XML or HTML to reflect a new encoding; that's the tree builder's job. - __license__ = "MIT" - - import codecs --from htmlentitydefs import codepoint2name -+from html.entities import codepoint2name - import re - import logging - import string -@@ -22,7 +22,7 @@ try: - # PyPI package: cchardet - import cchardet - def chardet_dammit(s): -- if isinstance(s, unicode): -+ if isinstance(s, str): - return None - return cchardet.detect(s)['encoding'] - except ImportError: -@@ -32,7 +32,7 @@ except ImportError: - # PyPI package: chardet - import chardet - def chardet_dammit(s): -- if isinstance(s, unicode): -+ if isinstance(s, str): - return None - return chardet.detect(s)['encoding'] - #import chardet.constants -@@ -53,14 +53,14 @@ except ImportError: - - # Build bytestring and Unicode versions of regular expressions for finding - # a declared encoding inside an XML or HTML document. --xml_encoding = u'^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' --html_meta = u'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' -+xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' -+html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' - encoding_res = dict() - encoding_res[bytes] = { - 'html' : re.compile(html_meta.encode("ascii"), re.I), - 'xml' : re.compile(xml_encoding.encode("ascii"), re.I), - } --encoding_res[unicode] = { -+encoding_res[str] = { - 'html' : re.compile(html_meta, re.I), - 'xml' : re.compile(xml_encoding, re.I) - } -@@ -80,7 +80,7 @@ class EntitySubstitution(object): - # entities, but that's a little tricky. - extra = [(39, 'apos')] - for codepoint, name in list(codepoint2name.items()) + extra: -- character = unichr(codepoint) -+ character = chr(codepoint) - if codepoint not in (34, 39): - # There's no point in turning the quotation mark into - # " or the single quote into ', unless it -@@ -323,7 +323,7 @@ class EncodingDetector: - :return: A 2-tuple (modified data, implied encoding) - """ - encoding = None -- if isinstance(data, unicode): -+ if isinstance(data, str): - # Unicode data cannot have a byte-order mark. - return data, encoding - if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ -@@ -370,7 +370,7 @@ class EncodingDetector: - if isinstance(markup, bytes): - res = encoding_res[bytes] - else: -- res = encoding_res[unicode] -+ res = encoding_res[str] - - xml_re = res['xml'] - html_re = res['html'] -@@ -431,9 +431,9 @@ class UnicodeDammit: - markup, override_encodings, is_html, exclude_encodings) - - # Short-circuit if the data is in Unicode to begin with. -- if isinstance(markup, unicode) or markup == '': -+ if isinstance(markup, str) or markup == '': - self.markup = markup -- self.unicode_markup = unicode(markup) -+ self.unicode_markup = str(markup) - self.original_encoding = None - return - -@@ -523,7 +523,7 @@ class UnicodeDammit: - - :param encoding: The name of an encoding. - """ -- return unicode(data, encoding, errors) -+ return str(data, encoding, errors) - - @property - def declared_html_encoding(self): -diff --git a/bs4/diagnose.py b/bs4/diagnose.py -index e4f2f47..500e92d 100644 ---- a/bs4/diagnose.py -+++ b/bs4/diagnose.py -@@ -4,8 +4,8 @@ - __license__ = "MIT" - - import cProfile --from StringIO import StringIO --from HTMLParser import HTMLParser -+from io import StringIO -+from html.parser import HTMLParser - import bs4 - from bs4 import BeautifulSoup, __version__ - from bs4.builder import builder_registry -@@ -25,8 +25,8 @@ def diagnose(data): - :param data: A string containing markup that needs to be explained. - :return: None; diagnostics are printed to standard output. - """ -- print("Diagnostic running on Beautiful Soup %s" % __version__) -- print("Python version %s" % sys.version) -+ print(("Diagnostic running on Beautiful Soup %s" % __version__)) -+ print(("Python version %s" % sys.version)) - - basic_parsers = ["html.parser", "html5lib", "lxml"] - for name in basic_parsers: -@@ -35,16 +35,16 @@ def diagnose(data): - break - else: - basic_parsers.remove(name) -- print( -+ print(( - "I noticed that %s is not installed. Installing it may help." % -- name) -+ name)) - - if 'lxml' in basic_parsers: - basic_parsers.append("lxml-xml") - try: - from lxml import etree -- print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) -- except ImportError, e: -+ print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))) -+ except ImportError as e: - print( - "lxml is not installed or couldn't be imported.") - -@@ -52,21 +52,21 @@ def diagnose(data): - if 'html5lib' in basic_parsers: - try: - import html5lib -- print("Found html5lib version %s" % html5lib.__version__) -- except ImportError, e: -+ print(("Found html5lib version %s" % html5lib.__version__)) -+ except ImportError as e: - print( - "html5lib is not installed or couldn't be imported.") - - if hasattr(data, 'read'): - data = data.read() - elif data.startswith("http:") or data.startswith("https:"): -- print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) -+ print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)) - print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") - return - else: - try: - if os.path.exists(data): -- print('"%s" looks like a filename. Reading data from the file.' % data) -+ print(('"%s" looks like a filename. Reading data from the file.' % data)) - with open(data) as fp: - data = fp.read() - except ValueError: -@@ -76,19 +76,19 @@ def diagnose(data): - print("") - - for parser in basic_parsers: -- print("Trying to parse your markup with %s" % parser) -+ print(("Trying to parse your markup with %s" % parser)) - success = False - try: - soup = BeautifulSoup(data, features=parser) - success = True -- except Exception, e: -- print("%s could not parse the markup." % parser) -+ except Exception as e: -+ print(("%s could not parse the markup." % parser)) - traceback.print_exc() - if success: -- print("Here's what %s did with the markup:" % parser) -- print(soup.prettify()) -+ print(("Here's what %s did with the markup:" % parser)) -+ print((soup.prettify())) - -- print("-" * 80) -+ print(("-" * 80)) - - def lxml_trace(data, html=True, **kwargs): - """Print out the lxml events that occur during parsing. -@@ -104,7 +104,7 @@ def lxml_trace(data, html=True, **kwargs): - """ - from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): -- print("%s, %4s, %s" % (event, element.tag, element.text)) -+ print(("%s, %4s, %s" % (event, element.tag, element.text))) - - class AnnouncingParser(HTMLParser): - """Subclass of HTMLParser that announces parse events, without doing -@@ -193,9 +193,9 @@ def rdoc(num_elements=1000): - - def benchmark_parsers(num_elements=100000): - """Very basic head-to-head performance benchmark.""" -- print("Comparative parser benchmark on Beautiful Soup %s" % __version__) -+ print(("Comparative parser benchmark on Beautiful Soup %s" % __version__)) - data = rdoc(num_elements) -- print("Generated a large invalid HTML document (%d bytes)." % len(data)) -+ print(("Generated a large invalid HTML document (%d bytes)." % len(data))) - - for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: - success = False -@@ -204,24 +204,24 @@ def benchmark_parsers(num_elements=100000): - soup = BeautifulSoup(data, parser) - b = time.time() - success = True -- except Exception, e: -- print("%s could not parse the markup." % parser) -+ except Exception as e: -+ print(("%s could not parse the markup." % parser)) - traceback.print_exc() - if success: -- print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) -+ print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a))) - - from lxml import etree - a = time.time() - etree.HTML(data) - b = time.time() -- print("Raw lxml parsed the markup in %.2fs." % (b-a)) -+ print(("Raw lxml parsed the markup in %.2fs." % (b-a))) - - import html5lib - parser = html5lib.HTMLParser() - a = time.time() - parser.parse(data) - b = time.time() -- print("Raw html5lib parsed the markup in %.2fs." % (b-a)) -+ print(("Raw html5lib parsed the markup in %.2fs." % (b-a))) - - def profile(num_elements=100000, parser="lxml"): - """Use Python's profiler on a randomly generated document.""" -diff --git a/bs4/element.py b/bs4/element.py -index 09a81d9..81d9db9 100644 ---- a/bs4/element.py -+++ b/bs4/element.py -@@ -3,14 +3,14 @@ __license__ = "MIT" - - try: - from collections.abc import Callable # Python 3.6 --except ImportError , e: -+except ImportError as e: - from collections import Callable - import re - import sys - import warnings - try: - import soupsieve --except ImportError, e: -+except ImportError as e: - soupsieve = None - warnings.warn( - 'The soupsieve package is not installed. CSS selectors cannot be used.' -@@ -57,22 +57,22 @@ def _alias(attr): - # Source: - # https://docs.python.org/3/library/codecs.html#python-specific-encodings - PYTHON_SPECIFIC_ENCODINGS = set([ -- u"idna", -- u"mbcs", -- u"oem", -- u"palmos", -- u"punycode", -- u"raw_unicode_escape", -- u"undefined", -- u"unicode_escape", -- u"raw-unicode-escape", -- u"unicode-escape", -- u"string-escape", -- u"string_escape", -+ "idna", -+ "mbcs", -+ "oem", -+ "palmos", -+ "punycode", -+ "raw_unicode_escape", -+ "undefined", -+ "unicode_escape", -+ "raw-unicode-escape", -+ "unicode-escape", -+ "string-escape", -+ "string_escape", - ]) - - --class NamespacedAttribute(unicode): -+class NamespacedAttribute(str): - """A namespaced string (e.g. 'xml:lang') that remembers the namespace - ('xml') and the name ('lang') that were used to create it. - """ -@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode): - name = None - - if name is None: -- obj = unicode.__new__(cls, prefix) -+ obj = str.__new__(cls, prefix) - elif prefix is None: - # Not really namespaced. -- obj = unicode.__new__(cls, name) -+ obj = str.__new__(cls, name) - else: -- obj = unicode.__new__(cls, prefix + ":" + name) -+ obj = str.__new__(cls, prefix + ":" + name) - obj.prefix = prefix - obj.name = name - obj.namespace = namespace - return obj - --class AttributeValueWithCharsetSubstitution(unicode): -+class AttributeValueWithCharsetSubstitution(str): - """A stand-in object for a character encoding specified in HTML.""" - - class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): -@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """ - - def __new__(cls, original_value): -- obj = unicode.__new__(cls, original_value) -+ obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - -@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): - match = cls.CHARSET_RE.search(original_value) - if match is None: - # No substitution necessary. -- return unicode.__new__(unicode, original_value) -+ return str.__new__(str, original_value) - -- obj = unicode.__new__(cls, original_value) -+ obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - -@@ -376,7 +376,7 @@ class PageElement(object): - raise ValueError("Cannot insert None into a tag.") - if new_child is self: - raise ValueError("Cannot insert a tag into itself.") -- if (isinstance(new_child, basestring) -+ if (isinstance(new_child, str) - and not isinstance(new_child, NavigableString)): - new_child = NavigableString(new_child) - -@@ -753,7 +753,7 @@ class PageElement(object): - result = (element for element in generator - if isinstance(element, Tag)) - return ResultSet(strainer, result) -- elif isinstance(name, basestring): -+ elif isinstance(name, str): - # Optimization to find all tags with a given name. - if name.count(':') == 1: - # This is a name with a prefix. If this is a namespace-aware document, -@@ -872,7 +872,7 @@ class PageElement(object): - return self.parents - - --class NavigableString(unicode, PageElement): -+class NavigableString(str, PageElement): - """A Python Unicode string that is part of a parse tree. - - When Beautiful Soup parses the markup penguin, it will -@@ -895,10 +895,10 @@ class NavigableString(unicode, PageElement): - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ -- if isinstance(value, unicode): -- u = unicode.__new__(cls, value) -+ if isinstance(value, str): -+ u = str.__new__(cls, value) - else: -- u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) -+ u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - u.setup() - return u - -@@ -909,7 +909,7 @@ class NavigableString(unicode, PageElement): - return type(self)(self) - - def __getnewargs__(self): -- return (unicode(self),) -+ return (str(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards -@@ -975,30 +975,30 @@ class PreformattedString(NavigableString): - - class CData(PreformattedString): - """A CDATA block.""" -- PREFIX = u'' -+ PREFIX = '' - - class ProcessingInstruction(PreformattedString): - """A SGML processing instruction.""" - -- PREFIX = u'' -- SUFFIX = u'>' -+ PREFIX = '' -+ SUFFIX = '>' - - class XMLProcessingInstruction(ProcessingInstruction): - """An XML processing instruction.""" -- PREFIX = u'' -- SUFFIX = u'?>' -+ PREFIX = '' -+ SUFFIX = '?>' - - class Comment(PreformattedString): - """An HTML or XML comment.""" -- PREFIX = u'' -+ PREFIX = '' - - - class Declaration(PreformattedString): - """An XML declaration.""" -- PREFIX = u'' -- SUFFIX = u'?>' -+ PREFIX = '' -+ SUFFIX = '?>' - - - class Doctype(PreformattedString): -@@ -1026,8 +1026,8 @@ class Doctype(PreformattedString): - - return Doctype(value) - -- PREFIX = u'\n' -+ PREFIX = '\n' - - - class Stylesheet(NavigableString): -@@ -1263,7 +1263,7 @@ class Tag(PageElement): - for string in self._all_strings(True): - yield string - -- def get_text(self, separator=u"", strip=False, -+ def get_text(self, separator="", strip=False, - types=(NavigableString, CData)): - """Get all child strings, concatenated using the given separator. - -@@ -1416,7 +1416,7 @@ class Tag(PageElement): - def __contains__(self, x): - return x in self.contents - -- def __nonzero__(self): -+ def __bool__(self): - "A tag is non-None even if it has no contents." - return True - -@@ -1565,8 +1565,8 @@ class Tag(PageElement): - else: - if isinstance(val, list) or isinstance(val, tuple): - val = ' '.join(val) -- elif not isinstance(val, basestring): -- val = unicode(val) -+ elif not isinstance(val, str): -+ val = str(val) - elif ( - isinstance(val, AttributeValueWithCharsetSubstitution) - and eventual_encoding is not None -@@ -1575,7 +1575,7 @@ class Tag(PageElement): - - text = formatter.attribute_value(val) - decoded = ( -- unicode(key) + '=' -+ str(key) + '=' - + formatter.quoted_attribute_value(text)) - attrs.append(decoded) - close = '' -@@ -1934,7 +1934,7 @@ class SoupStrainer(object): - else: - attrs = kwargs - normalized_attrs = {} -- for key, value in attrs.items(): -+ for key, value in list(attrs.items()): - normalized_attrs[key] = self._normalize_search_value(value) - - self.attrs = normalized_attrs -@@ -1943,7 +1943,7 @@ class SoupStrainer(object): - def _normalize_search_value(self, value): - # Leave it alone if it's a Unicode string, a callable, a - # regular expression, a boolean, or None. -- if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match') -+ if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match') - or isinstance(value, bool) or value is None): - return value - -@@ -1956,7 +1956,7 @@ class SoupStrainer(object): - new_value = [] - for v in value: - if (hasattr(v, '__iter__') and not isinstance(v, bytes) -- and not isinstance(v, unicode)): -+ and not isinstance(v, str)): - # This is almost certainly the user's mistake. In the - # interests of avoiding infinite loops, we'll let - # it through as-is rather than doing a recursive call. -@@ -1968,7 +1968,7 @@ class SoupStrainer(object): - # Otherwise, convert it into a Unicode string. - # The unicode(str()) thing is so this will do the same thing on Python 2 - # and Python 3. -- return unicode(str(value)) -+ return str(str(value)) - - def __str__(self): - """A human-readable representation of this SoupStrainer.""" -@@ -1996,7 +1996,7 @@ class SoupStrainer(object): - markup = markup_name - markup_attrs = markup - -- if isinstance(self.name, basestring): -+ if isinstance(self.name, str): - # Optimization for a very common case where the user is - # searching for a tag with one specific name, and we're - # looking at a tag with a different name. -@@ -2052,7 +2052,7 @@ class SoupStrainer(object): - found = None - # If given a list of items, scan it for a text element that - # matches. -- if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)): -+ if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)): - for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): -@@ -2065,7 +2065,7 @@ class SoupStrainer(object): - found = self.search_tag(markup) - # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ -- isinstance(markup, basestring): -+ isinstance(markup, str): - if not self.name and not self.attrs and self._matches(markup, self.text): - found = markup - else: -@@ -2110,7 +2110,7 @@ class SoupStrainer(object): - return not match_against - - if (hasattr(match_against, '__iter__') -- and not isinstance(match_against, basestring)): -+ and not isinstance(match_against, str)): - # We're asked to match against an iterable of items. - # The markup must be match at least one item in the - # iterable. We'll try each one in turn. -@@ -2137,7 +2137,7 @@ class SoupStrainer(object): - # the tag's name and once against its prefixed name. - match = False - -- if not match and isinstance(match_against, unicode): -+ if not match and isinstance(match_against, str): - # Exact string match - match = markup == match_against - -diff --git a/bs4/formatter.py b/bs4/formatter.py -index 9a692ec..2cbab4c 100644 ---- a/bs4/formatter.py -+++ b/bs4/formatter.py -@@ -83,7 +83,7 @@ class Formatter(EntitySubstitution): - """ - if not self.entity_substitution: - return ns -- from element import NavigableString -+ from .element import NavigableString - if (isinstance(ns, NavigableString) - and ns.parent is not None - and ns.parent.name in self.cdata_containing_tags): -diff --git a/bs4/testing.py b/bs4/testing.py -index a2f83a1..9ca507b 100644 ---- a/bs4/testing.py -+++ b/bs4/testing.py -@@ -25,7 +25,7 @@ from bs4.element import ( - from bs4.builder import HTMLParserTreeBuilder - default_builder = HTMLParserTreeBuilder - --BAD_DOCUMENT = u"""A bare string -+BAD_DOCUMENT = """A bare string - - - -@@ -94,7 +94,7 @@ class SoupTest(unittest.TestCase): - # Verify that every tag that was opened was eventually closed. - - # There are no tags in the open tag counter. -- assert all(v==0 for v in obj.open_tag_counter.values()) -+ assert all(v==0 for v in list(obj.open_tag_counter.values())) - - # The only tag in the tag stack is the one for the root - # document. -@@ -372,7 +372,7 @@ class HTMLTreeBuilderSmokeTest(object): - # process_markup correctly sets processing_instruction_class - # even when the markup is already Unicode and there is no - # need to process anything. -- markup = u"""""" -+ markup = """""" - soup = self.soup(markup) - self.assertEqual(markup, soup.decode()) - -@@ -544,14 +544,14 @@ Hello, world! - # "&T" and "&p" look like incomplete character entities, but they are - # not. - self.assertSoupEquals( -- u"• AT&T is in the s&p 500
", -- u"\u2022 AT&T is in the s&p 500
" -+ "• AT&T is in the s&p 500
", -+ "\u2022 AT&T is in the s&p 500
" - ) - - def test_apos_entity(self): - self.assertSoupEquals( -- u"Bob's Bar
", -- u"Bob's Bar
", -+ "Bob's Bar
", -+ "Bob's Bar
", - ) - - def test_entities_in_foreign_document_encoding(self): -@@ -564,17 +564,17 @@ Hello, world! - # characters. - markup = "Hello -☃
" - soup = self.soup(markup) -- self.assertEquals(u"“Hello” -☃", soup.p.string) -+ self.assertEqual("“Hello” -☃", soup.p.string) - - def test_entities_in_attributes_converted_to_unicode(self): -- expect = u'' -+ expect = '' - self.assertSoupEquals('', expect) - self.assertSoupEquals('', expect) - self.assertSoupEquals('', expect) - self.assertSoupEquals('', expect) - - def test_entities_in_text_converted_to_unicode(self): -- expect = u'pi\N{LATIN SMALL LETTER N WITH TILDE}ata
' -+ expect = 'pi\N{LATIN SMALL LETTER N WITH TILDE}ata
' - self.assertSoupEquals("piñata
", expect) - self.assertSoupEquals("piñata
", expect) - self.assertSoupEquals("piñata
", expect) -@@ -585,7 +585,7 @@ Hello, world! - 'I said "good day!"
') - - def test_out_of_range_entity(self): -- expect = u"\N{REPLACEMENT CHARACTER}" -+ expect = "\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("", expect) - self.assertSoupEquals("", expect) - self.assertSoupEquals("", expect) -@@ -663,9 +663,9 @@ Hello, world! - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! -- markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' -+ markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) -- self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) -+ self.assertEqual('Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" -@@ -705,7 +705,7 @@ Hello, world! - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "<<sacré bleu!>>
" -- expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
" -+ expected = "<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): -@@ -715,15 +715,15 @@ Hello, world! - soup = self.soup(quote) - self.assertEqual( - soup.p.string, -- u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") -+ "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup(" ") -- self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) -+ self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "<<sacré bleu!>>
" -- expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
".encode("utf-8") -+ expected = "<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>
".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - -@@ -732,7 +732,7 @@ Hello, world! - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. -- unicode_html = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!
' -+ unicode_html = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!
' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. -@@ -848,8 +848,8 @@ Hello, world! - soup = self.soup(markup) - for encoding in PYTHON_SPECIFIC_ENCODINGS: - if encoding in ( -- u'idna', u'mbcs', u'oem', u'undefined', -- u'string_escape', u'string-escape' -+ 'idna', 'mbcs', 'oem', 'undefined', -+ 'string_escape', 'string-escape' - ): - # For one reason or another, these will raise an - # exception if we actually try to use them, so don't -@@ -910,8 +910,8 @@ class XMLTreeBuilderSmokeTest(object): - soup = self.soup(markup) - for encoding in PYTHON_SPECIFIC_ENCODINGS: - if encoding in ( -- u'idna', u'mbcs', u'oem', u'undefined', -- u'string_escape', u'string-escape' -+ 'idna', 'mbcs', 'oem', 'undefined', -+ 'string_escape', 'string-escape' - ): - # For one reason or another, these will raise an - # exception if we actually try to use them, so don't -@@ -962,15 +962,15 @@ class XMLTreeBuilderSmokeTest(object): - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): -- markup = u'foo
\n' - soup = self.soup(markup) -- self.assertEqual(u"foo
\n", soup.body.decode()) -+ self.assertEqual("foo
\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - - def test_reparented_markup_ends_with_whitespace(self): - markup = 'foo
\n\n' - soup = self.soup(markup) -- self.assertEqual(u"foo
\n\n", soup.body.decode()) -+ self.assertEqual("foo
\n\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - def test_reparented_markup_containing_identical_whitespace_nodes(self): -@@ -127,7 +127,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - def test_foster_parenting(self): - markup = b"""A"""
- soup = self.soup(markup)
-- self.assertEqual(u"Aéé" -+ data = "éé" - soup = self.soup(data) -- self.assertEqual(u"éé", soup.h1.string) -+ self.assertEqual("éé", soup.h1.string) - - def test_embedded_null(self): -- data = u"foo\0bar" -+ data = "foo\0bar" - soup = self.soup(data) -- self.assertEqual(u"foo\0bar", soup.h1.string) -+ self.assertEqual("foo\0bar", soup.h1.string) - - def test_exclude_encodings(self): -- utf8_data = u"Räksmörgås".encode("utf-8") -+ utf8_data = "Räksmörgås".encode("utf-8") - soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) - self.assertEqual("windows-1252", soup.original_encoding) - -@@ -127,7 +127,7 @@ class TestConstructor(SoupTest): - yield markup, None, None, False - - import re -- self.assertRaisesRegexp( -+ self.assertRaisesRegex( - ParserRejectedMarkup, - "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.", - BeautifulSoup, '', builder=Mock, -@@ -303,7 +303,7 @@ class TestWarnings(SoupTest): - with warnings.catch_warnings(record=True) as warning_list: - # note - this url must differ from the bytes one otherwise - # python's warnings system swallows the second warning -- soup = self.soup(u"http://www.crummyunicode.com/") -+ soup = self.soup("http://www.crummyunicode.com/") - warning = self._assert_warning( - warning_list, MarkupResemblesLocatorWarning - ) -@@ -319,7 +319,7 @@ class TestWarnings(SoupTest): - - def test_url_warning_with_unicode_and_space(self): - with warnings.catch_warnings(record=True) as warning_list: -- soup = self.soup(u"http://www.crummyuncode.com/ is great") -+ soup = self.soup("http://www.crummyuncode.com/ is great") - self.assertFalse(any("looks like a URL" in str(w.message) - for w in warning_list)) - -@@ -341,9 +341,9 @@ class TestEntitySubstitution(unittest.TestCase): - def test_simple_html_substitution(self): - # Unicode characters corresponding to named HTML entites - # are substituted, and no others. -- s = u"foo\u2200\N{SNOWMAN}\u00f5bar" -+ s = "foo\u2200\N{SNOWMAN}\u00f5bar" - self.assertEqual(self.sub.substitute_html(s), -- u"foo∀\N{SNOWMAN}õbar") -+ "foo∀\N{SNOWMAN}õbar") - - def test_smart_quote_substitution(self): - # MS smart quotes are a common source of frustration, so we -@@ -408,7 +408,7 @@ class TestEncodingConversion(SoupTest): - - def setUp(self): - super(TestEncodingConversion, self).setUp() -- self.unicode_data = u'Räksmörgås') -- self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås') -+ soup = self.soup('Räksmörgås') -+ self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') - - def test_unicode_attribute_find(self): -- soup = self.soup(u'here it is') -+ soup = self.soup('here it is') - str(soup) -- self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) -+ self.assertEqual("here it is", soup.find(id='Räksmörgås').text) - - - def test_find_everything(self): -@@ -101,17 +101,17 @@ class TestFindAll(TreeTest): - """You can search the tree for text nodes.""" - soup = self.soup("Foobar\xbb") - # Exact match. -- self.assertEqual(soup.find_all(string="bar"), [u"bar"]) -- self.assertEqual(soup.find_all(text="bar"), [u"bar"]) -+ self.assertEqual(soup.find_all(string="bar"), ["bar"]) -+ self.assertEqual(soup.find_all(text="bar"), ["bar"]) - # Match any of a number of strings. - self.assertEqual( -- soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) -+ soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) - # Match a regular expression. - self.assertEqual(soup.find_all(text=re.compile('.*')), -- [u"Foo", u"bar", u'\xbb']) -+ ["Foo", "bar", '\xbb']) - # Match anything. - self.assertEqual(soup.find_all(text=True), -- [u"Foo", u"bar", u'\xbb']) -+ ["Foo", "bar", '\xbb']) - - def test_find_all_limit(self): - """You can limit the number of items returned by find_all.""" -@@ -254,8 +254,8 @@ class TestFindAllByAttribute(TreeTest): - ["Matching a.", "Matching b."]) - - def test_find_all_by_utf8_attribute_value(self): -- peace = u"םולש".encode("utf8") -- data = u''.encode("utf8") -+ peace = "םולש".encode("utf8") -+ data = ''.encode("utf8") - soup = self.soup(data) - self.assertEqual([soup.a], soup.find_all(title=peace)) - self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) -@@ -444,7 +444,7 @@ class TestSmooth(TreeTest): - # output. - - # Since the tag has two children, its .string is None. -- self.assertEquals(None, div.span.string) -+ self.assertEqual(None, div.span.string) - - self.assertEqual(7, len(div.contents)) - div.smooth() -@@ -755,18 +755,18 @@ class TestTag(SoupTest): - - # No list of whitespace-preserving tags -> pretty-print - tag._preserve_whitespace_tags = None -- self.assertEquals(True, tag._should_pretty_print(0)) -+ self.assertEqual(True, tag._should_pretty_print(0)) - - # List exists but tag is not on the list -> pretty-print - tag.preserve_whitespace_tags = ["some_other_tag"] -- self.assertEquals(True, tag._should_pretty_print(1)) -+ self.assertEqual(True, tag._should_pretty_print(1)) - - # Indent level is None -> don't pretty-print -- self.assertEquals(False, tag._should_pretty_print(None)) -+ self.assertEqual(False, tag._should_pretty_print(None)) - - # Tag is on the whitespace-preserving list -> don't pretty-print - tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"] -- self.assertEquals(False, tag._should_pretty_print(1)) -+ self.assertEqual(False, tag._should_pretty_print(1)) - - - class TestTagCreation(SoupTest): -@@ -905,10 +905,10 @@ class TestTreeModification(SoupTest): - assert not isinstance(i, BeautifulSoup) - - p1, p2, p3, p4 = list(soup.children) -- self.assertEquals("And now, a word:", p1.string) -- self.assertEquals("p2", p2.string) -- self.assertEquals("p3", p3.string) -- self.assertEquals("And we're back.", p4.string) -+ self.assertEqual("And now, a word:", p1.string) -+ self.assertEqual("p2", p2.string) -+ self.assertEqual("p3", p3.string) -+ self.assertEqual("And we're back.", p4.string) - - - def test_replace_with_maintains_next_element_throughout(self): -@@ -1015,8 +1015,8 @@ class TestTreeModification(SoupTest): - d1 = soup.find('div', id='d1') - d2 = soup.find('div', id='d2') - d2.extend(d1) -- self.assertEqual(u'', d1.decode()) -- self.assertEqual(u'', d2.decode()) -+ self.assertEqual('', d1.decode()) -+ self.assertEqual('', d2.decode()) - - def test_move_tag_to_beginning_of_parent(self): - data = "', 'html.parser') - encoding = soup.original_encoding - copy = soup.__copy__() -- self.assertEqual(u" ", unicode(copy)) -+ self.assertEqual(" ", str(copy)) - self.assertEqual(encoding, copy.original_encoding) - - def test_copy_preserves_builder_information(self): -@@ -1554,14 +1554,14 @@ class TestPersistence(SoupTest): - - def test_unicode_pickle(self): - # A tree containing Unicode characters can be pickled. -- html = u"\N{SNOWMAN}" -+ html = "\N{SNOWMAN}" - soup = self.soup(html) - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.decode(), soup.decode()) - - def test_copy_navigablestring_is_not_attached_to_tree(self): -- html = u"FooBar" -+ html = "FooBar" - soup = self.soup(html) - s1 = soup.find(string="Foo") - s2 = copy.copy(s1) -@@ -1573,7 +1573,7 @@ class TestPersistence(SoupTest): - self.assertEqual(None, s2.previous_element) - - def test_copy_navigablestring_subclass_has_same_type(self): -- html = u"" -+ html = "" - soup = self.soup(html) - s1 = soup.string - s2 = copy.copy(s1) -@@ -1581,19 +1581,19 @@ class TestPersistence(SoupTest): - self.assertTrue(isinstance(s2, Comment)) - - def test_copy_entire_soup(self): -- html = u"end" -+ html = "end" - soup = self.soup(html) - soup_copy = copy.copy(soup) - self.assertEqual(soup, soup_copy) - - def test_copy_tag_copies_contents(self): -- html = u"end" -+ html = "end" - soup = self.soup(html) - div = soup.div - div_copy = copy.copy(div) - - # The two tags look the same, and evaluate to equal. -- self.assertEqual(unicode(div), unicode(div_copy)) -+ self.assertEqual(str(div), str(div_copy)) - self.assertEqual(div, div_copy) - - # But they're not the same object. -@@ -1609,17 +1609,17 @@ class TestPersistence(SoupTest): - class TestSubstitutions(SoupTest): - - def test_default_formatter_is_minimal(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( -- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) -+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_html(self): -- markup = u" <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = " <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html") - self.assertEqual( -@@ -1627,7 +1627,7 @@ class TestSubstitutions(SoupTest): - self.document_for(" <<Sacré bleu!>>")) - - def test_formatter_html5(self): -- markup = u" <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = " <<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html5") - self.assertEqual( -@@ -1635,49 +1635,49 @@ class TestSubstitutions(SoupTest): - self.document_for(" <<Sacré bleu!>>")) - - def test_formatter_minimal(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( -- u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) -+ "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_null(self): -- markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" -+ markup = "<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter=None) - # Neither the angle brackets nor the e-with-acute are converted. - # This is not valid HTML, but it's what the user wanted. - self.assertEqual(decoded, -- self.document_for(u"< " -+ markup = "<foo>bar " - soup = self.soup(markup) - decoded = soup.decode(formatter = lambda x: x.upper()) - # Instead of normal entity conversion code, the custom - # callable is called on every string. - self.assertEqual( - decoded, -- self.document_for(u" ")) -+ self.document_for(" ")) - - def test_formatter_is_run_on_attribute_values(self): -- markup = u'e' -+ markup = 'e' - soup = self.soup(markup) - a = soup.a - -- expect_minimal = u'e' -+ expect_minimal = 'e' - - self.assertEqual(expect_minimal, a.decode()) - self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - -- expect_html = u'e' -+ expect_html = 'e' - self.assertEqual(expect_html, a.decode(formatter="html")) - - self.assertEqual(markup, a.decode(formatter=None)) -- expect_upper = u'E' -+ expect_upper = 'E' - self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) - - def test_formatter_skips_script_tag_for_html_documents(self): -@@ -1703,7 +1703,7 @@ class TestSubstitutions(SoupTest): - # Everything outside the tag is reformatted, but everything - # inside is left alone. - self.assertEqual( -- u' |