From: Oleg Broytman Date: Mon, 20 Nov 2023 17:49:22 +0000 (+0300) Subject: Fix(Py3): `html.parser` cannot parse bytes X-Git-Tag: 5.0.0~5 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=0478809ea1e8ae324e324e99cbb2e25bd0bf31b3 Fix(Py3): `html.parser` cannot parse bytes Decode to unicode from a known encoding. --- diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index c0f89b4..d11a2ff 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -91,6 +91,11 @@ class HTMLParser(_HTMLParser): def parse_html(html_text, charset=None, log=None): if not html_text: return None + if charset is None and isinstance(html_text, bytes): + return None # html.parser cannot parse bytes + if charset and isinstance(html_text, bytes): + html_text = html_text.decode(charset) + parser = HTMLParser(charset) try: