From a23c3efc90dd6a038d11f5892510a93e09593a70 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 2 Mar 2024 16:28:46 +0300 Subject: [PATCH] Fix(Robots/bkmk_robot_base): Ignore unknown charset There are sites that provide incorrect (most probably misspelled) charset. --- Robots/bkmk_robot_base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 7243918..1e511d0 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -135,8 +135,13 @@ class robot_base(Robot): break content_stripped = content.strip() if content_stripped and charset: - content_stripped = content_stripped.decode( - charset, 'replace') + try: + content_stripped = content_stripped.decode( + charset, 'replace') + except LookupError: + charset = None + self.log(" unknown charset " + "in Content-Type header") if content_stripped and is_html: parser = parse_html( content_stripped, charset, self.log) -- 2.39.2