]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_robot_base.py
Fix(robots): Fix "Content-Length" header returning `None`
[bookmarks_db.git] / Robots / bkmk_robot_base.py
index f32e1b304d45aab959b047d51db4140241b90633..32f8c68dc695ff3c3e651c067ff0a09574f9f617 100644 (file)
@@ -90,7 +90,7 @@ class robot_base(Robot):
                 try:
                     size = headers["Content-Length"]
                 except KeyError:
-                    size = len(content)
+                    pass
 
                 try:
                     last_modified = headers["Last-Modified"]
@@ -99,7 +99,8 @@ class robot_base(Robot):
 
                 if last_modified:
                     last_modified = parse_time(last_modified)
-            else:
+
+            if not size:  # Could be None from headers
                 size = len(content)
 
             if last_modified:
@@ -126,7 +127,7 @@ class robot_base(Robot):
             if headers:
                 try:
                     content_type = headers["Content-Type"]
-                    self.log("   Content-Type: %s" % content_type)
+                    self.log("   Content-Type   : %s" % content_type)
                     if content_type is None:
                         if 'html' in content.lower():
                             content_type = 'text/html'
@@ -149,8 +150,13 @@ class robot_base(Robot):
                         if content_type.startswith(ctype):
                             is_html = True
                             break
-                    if content and is_html:
-                        parser = parse_html(content, charset, self.log)
+                    content_stripped = content.strip()
+                    if content_stripped and is_html:
+                        parser = parse_html(content_stripped, charset, self.log)
+                        if charset:
+                            bookmark.charset = charset
+                        elif parser and parser.meta_charset:
+                            bookmark.charset = parser.meta_charset
                         if parser:
                             bookmark.real_title = parser.title
                             icon = parser.icon
@@ -246,7 +252,7 @@ class robot_base(Robot):
                                                   % (url, timeout)
                                                   )
 
-                    if not content:
+                    if not content_stripped:
                         self.log("   empty response, no content")
                     if not is_html:
                         self.log("   not html")