From: Oleg Broytman Date: Sat, 2 Mar 2024 13:28:46 +0000 (+0300) Subject: Fix(Robots/bkmk_robot_base): Ignore unknown charset X-Git-Tag: 5.2.2~5 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=a23c3efc90dd6a038d11f5892510a93e09593a70 Fix(Robots/bkmk_robot_base): Ignore unknown charset There are sites that provide incorrect (most probably misspelled) charset. --- diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 7243918..1e511d0 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -135,8 +135,13 @@ class robot_base(Robot): break content_stripped = content.strip() if content_stripped and charset: - content_stripped = content_stripped.decode( - charset, 'replace') + try: + content_stripped = content_stripped.decode( + charset, 'replace') + except LookupError: + charset = None + self.log(" unknown charset " + "in Content-Type header") if content_stripped and is_html: parser = parse_html( content_stripped, charset, self.log)