From: Oleg Broytman <phd@phdru.name>
Date: Fri, 17 Nov 2023 23:55:10 +0000 (+0300)
Subject: Fix(robots): Do not parse empty strings
X-Git-Tag: 5.0.0~19
X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=066f29ea81222a8a2ddd4ab1aff131d7fc1ec37f

Fix(robots): Do not parse empty strings

Some sites return empty "html" that consist only of white spaces.
Strip them to get really empty string.
---

diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py
index d8877c6..52d6b56 100644
--- a/Robots/bkmk_robot_base.py
+++ b/Robots/bkmk_robot_base.py
@@ -149,8 +149,9 @@ class robot_base(Robot):
                         if content_type.startswith(ctype):
                             is_html = True
                             break
-                    if content and is_html:
-                        parser = parse_html(content, charset, self.log)
+                    content_stripped = content.strip()
+                    if content_stripped and is_html:
+                        parser = parse_html(content_stripped, charset, self.log)
                         if parser:
                             bookmark.real_title = parser.title
                             icon = parser.icon
@@ -246,7 +247,7 @@ class robot_base(Robot):
                                                   % (url, timeout)
                                                   )
 
-                    if not content:
+                    if not content_stripped:
                         self.log("   empty response, no content")
                     if not is_html:
                         self.log("   not html")