X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_robot_base.py;h=fd7237ae0184033797ef30a6a773fe9b200c10de;hb=ea440b7a93009308e8d87351725b600f1929b181;hp=f32e1b304d45aab959b047d51db4140241b90633;hpb=e69ee847224026d24684847d19d178f5b0db9309;p=bookmarks_db.git diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index f32e1b3..fd7237a 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -90,7 +90,7 @@ class robot_base(Robot): try: size = headers["Content-Length"] except KeyError: - size = len(content) + pass try: last_modified = headers["Last-Modified"] @@ -99,7 +99,8 @@ class robot_base(Robot): if last_modified: last_modified = parse_time(last_modified) - else: + + if not size: # Could be None from headers size = len(content) if last_modified: @@ -126,7 +127,7 @@ class robot_base(Robot): if headers: try: content_type = headers["Content-Type"] - self.log(" Content-Type: %s" % content_type) + self.log(" Content-Type : %s" % content_type) if content_type is None: if 'html' in content.lower(): content_type = 'text/html' @@ -149,8 +150,14 @@ class robot_base(Robot): if content_type.startswith(ctype): is_html = True break - if content and is_html: - parser = parse_html(content, charset, self.log) + content_stripped = content.strip() + if content_stripped and is_html: + parser = parse_html( + content_stripped, charset, self.log) + if charset: + bookmark.charset = charset + elif parser and parser.meta_charset: + bookmark.charset = parser.meta_charset if parser: bookmark.real_title = parser.title icon = parser.icon @@ -246,7 +253,7 @@ class robot_base(Robot): % (url, timeout) ) - if not content: + if not content_stripped: self.log(" empty response, no content") if not is_html: self.log(" not html")