]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fix(Robots/bkmk_robot_base): Ignore unknown charset
authorOleg Broytman <phd@phdru.name>
Sat, 2 Mar 2024 13:28:46 +0000 (16:28 +0300)
committerOleg Broytman <phd@phdru.name>
Sat, 2 Mar 2024 13:28:46 +0000 (16:28 +0300)
There are sites that provide incorrect
(most probably misspelled) charset.

Robots/bkmk_robot_base.py

index 724391853f57f4d7912f579e261d403de35ccb47..1e511d0b48a625e752f337431892ff7872c050bf 100644 (file)
@@ -135,8 +135,13 @@ class robot_base(Robot):
                             break
                     content_stripped = content.strip()
                     if content_stripped and charset:
-                        content_stripped = content_stripped.decode(
-                            charset, 'replace')
+                        try:
+                            content_stripped = content_stripped.decode(
+                                charset, 'replace')
+                        except LookupError:
+                            charset = None
+                            self.log("   unknown charset "
+                                     "in Content-Type header")
                     if content_stripped and is_html:
                         parser = parse_html(
                             content_stripped, charset, self.log)