From ee4dd2756362df1708298ff435e898b83a2d02a4 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Mon, 11 Sep 2023 17:06:44 +0300 Subject: [PATCH] Fix(Py3): Replace `unicode()` with `.decode()` --- bkmk_parser.py | 2 +- parse_html/bkmk_parse_html.py | 6 +++--- parse_html/bkmk_ph_beautifulsoup.py | 2 +- parse_html/bkmk_ph_beautifulsoup4.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bkmk_parser.py b/bkmk_parser.py index 420d20f..42c0712 100644 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -48,7 +48,7 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: if self.charset and default_encoding: - data = unicode(data, self.charset, "replace").\ + data = data.decode(self.charset, "replace").\ encode(default_encoding, "xmlcharrefreplace") self.accumulator += data diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py index 6afb7f9..997bf64 100644 --- a/parse_html/bkmk_parse_html.py +++ b/parse_html/bkmk_parse_html.py @@ -167,7 +167,7 @@ def parse_html(html_text, charset=None, log=None): converted_title = title = parser.title if title and (not parser.charset): try: - unicode(title, "ascii") + title.decode("ascii") except UnicodeDecodeError: parser.charset = DEFAULT_CHARSET @@ -186,7 +186,7 @@ def parse_html(html_text, charset=None, log=None): if log: log(" title : %s" % title) if parser.charset != universal_charset: try: - converted_title = unicode(title, parser.charset).\ + converted_title = title.decode(parser.charset).\ encode(universal_charset) except UnicodeError: if log: @@ -194,7 +194,7 @@ def parse_html(html_text, charset=None, log=None): "converting from %s" % (parser.charset, DEFAULT_CHARSET)) converted_title = \ - unicode(title, DEFAULT_CHARSET, "replace").\ + title.decode(DEFAULT_CHARSET, "replace").\ encode(universal_charset, "replace") parser.charset = DEFAULT_CHARSET if log and (converted_title != title): diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py index f2f042e..e29b499 100644 --- a/parse_html/bkmk_ph_beautifulsoup.py +++ b/parse_html/bkmk_ph_beautifulsoup.py @@ -96,7 +96,7 @@ def parse_html(html_text, charset=None, log=None): parts = [] for part in title: if not isinstance(part, basestring): - part = unicode(part) + part = part.decode() parts.append(part.strip()) title = ''.join(parts) diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py index e1662ed..1f77eb7 100644 --- a/parse_html/bkmk_ph_beautifulsoup4.py +++ b/parse_html/bkmk_ph_beautifulsoup4.py @@ -55,7 +55,7 @@ def parse_html(html_text, charset=None, log=None): parts = [] for part in title: if not isinstance(part, basestring): - part = unicode(part) + part = part.decode() parts.append(part.strip()) title = ''.join(parts) -- 2.39.2