From 066f29ea81222a8a2ddd4ab1aff131d7fc1ec37f Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 18 Nov 2023 02:55:10 +0300 Subject: [PATCH] Fix(robots): Do not parse empty strings Some sites return empty "html" that consist only of white spaces. Strip them to get really empty string. --- Robots/bkmk_robot_base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index d8877c6..52d6b56 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -149,8 +149,9 @@ class robot_base(Robot): if content_type.startswith(ctype): is_html = True break - if content and is_html: - parser = parse_html(content, charset, self.log) + content_stripped = content.strip() + if content_stripped and is_html: + parser = parse_html(content_stripped, charset, self.log) if parser: bookmark.real_title = parser.title icon = parser.icon @@ -246,7 +247,7 @@ class robot_base(Robot): % (url, timeout) ) - if not content: + if not content_stripped: self.log(" empty response, no content") if not is_html: self.log(" not html") -- 2.39.2