From: Oleg Broytman Date: Fri, 17 Nov 2023 23:55:10 +0000 (+0300) Subject: Fix(robots): Do not parse empty strings X-Git-Tag: 5.0.0~19 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=066f29ea81222a8a2ddd4ab1aff131d7fc1ec37f Fix(robots): Do not parse empty strings Some sites return empty "html" that consist only of white spaces. Strip them to get really empty string. --- diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index d8877c6..52d6b56 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -149,8 +149,9 @@ class robot_base(Robot): if content_type.startswith(ctype): is_html = True break - if content and is_html: - parser = parse_html(content, charset, self.log) + content_stripped = content.strip() + if content_stripped and is_html: + parser = parse_html(content_stripped, charset, self.log) if parser: bookmark.real_title = parser.title icon = parser.icon @@ -246,7 +247,7 @@ class robot_base(Robot): % (url, timeout) ) - if not content: + if not content_stripped: self.log(" empty response, no content") if not is_html: self.log(" not html")