X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_robot_base.py;h=52d6b563f1056c9ffe29a08299d59084d248be4e;hb=066f29ea81222a8a2ddd4ab1aff131d7fc1ec37f;hp=85c704a64955608c5add79747020d2b48ea74889;hpb=48440e20170112c8a036b0c66c1cbe067ed1d87e;p=bookmarks_db.git diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 85c704a..52d6b56 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -29,10 +29,11 @@ from parse_html import parse_html reloc_dict = { - 301: "perm.", + 301: "perm1.", 302: "temp2.", 303: "temp3.", 307: "temp7.", + 308: "temp8.", "html": "html" } @@ -125,7 +126,7 @@ class robot_base(Robot): if headers: try: content_type = headers["Content-Type"] - self.log(" Content-Type: %s" % content_type) + self.log(" Content-Type : %s" % content_type) if content_type is None: if 'html' in content.lower(): content_type = 'text/html' @@ -143,14 +144,14 @@ class robot_base(Robot): except (ValueError, IndexError): charset = None self.log(" no charset in Content-Type header") + is_html = False for ctype in ("text/html", "application/xhtml+xml"): if content_type.startswith(ctype): - html = True + is_html = True break - else: - html = False - if html: - parser = parse_html(content, charset, self.log) + content_stripped = content.strip() + if content_stripped and is_html: + parser = parse_html(content_stripped, charset, self.log) if parser: bookmark.real_title = parser.title icon = parser.icon @@ -246,6 +247,10 @@ class robot_base(Robot): % (url, timeout) ) + if not content_stripped: + self.log(" empty response, no content") + if not is_html: + self.log(" not html") except KeyError as key: self.log(" no header: %s" % key)