X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_robot_base.py;h=fd7237ae0184033797ef30a6a773fe9b200c10de;hb=ea440b7a93009308e8d87351725b600f1929b181;hp=d8877c6f41c6b6f30c8d8ba4b8dbcec396e6c02b;hpb=8c04e58972d1c58ab82250df093c3d503eed4fe2;p=bookmarks_db.git diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index d8877c6..fd7237a 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -90,7 +90,7 @@ class robot_base(Robot): try: size = headers["Content-Length"] except KeyError: - size = len(content) + pass try: last_modified = headers["Last-Modified"] @@ -99,7 +99,8 @@ class robot_base(Robot): if last_modified: last_modified = parse_time(last_modified) - else: + + if not size: # Could be None from headers size = len(content) if last_modified: @@ -149,8 +150,14 @@ class robot_base(Robot): if content_type.startswith(ctype): is_html = True break - if content and is_html: - parser = parse_html(content, charset, self.log) + content_stripped = content.strip() + if content_stripped and is_html: + parser = parse_html( + content_stripped, charset, self.log) + if charset: + bookmark.charset = charset + elif parser and parser.meta_charset: + bookmark.charset = parser.meta_charset if parser: bookmark.real_title = parser.title icon = parser.icon @@ -246,7 +253,7 @@ class robot_base(Robot): % (url, timeout) ) - if not content: + if not content_stripped: self.log(" empty response, no content") if not is_html: self.log(" not html")