]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fix(Py3): `html.parser` cannot parse bytes
authorOleg Broytman <phd@phdru.name>
Mon, 20 Nov 2023 17:49:22 +0000 (20:49 +0300)
committerOleg Broytman <phd@phdru.name>
Mon, 20 Nov 2023 19:16:09 +0000 (22:16 +0300)
Decode to unicode from a known encoding.

parse_html/bkmk_ph_htmlparser.py

index c0f89b411d688bc99ecaab1a377c80abb36b6891..d11a2ff9fbeab4b5e5ec8daa1c2a7b4205ac63e4 100644 (file)
@@ -91,6 +91,11 @@ class HTMLParser(_HTMLParser):
 def parse_html(html_text, charset=None, log=None):
     if not html_text:
         return None
+    if charset is None and isinstance(html_text, bytes):
+        return None  # html.parser cannot parse bytes
+    if charset and isinstance(html_text, bytes):
+        html_text = html_text.decode(charset)
+
     parser = HTMLParser(charset)
 
     try: