Style: Fix `flake8` E501 line too long

[bookmarks_db.git] / Robots / bkmk_robot_base.py
diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py

index 85c704a64955608c5add79747020d2b48ea74889..fd7237ae0184033797ef30a6a773fe9b200c10de 100644 (file)
--- a/Robots/bkmk_robot_base.py
+++ b/Robots/bkmk_robot_base.py
@@ -29,10 +29,11 @@ from parse_html import parse_html
  
  
  reloc_dict = {
-  301: "perm.",
+  301: "perm1.",
    302: "temp2.",
    303: "temp3.",
    307: "temp7.",
+  308: "temp8.",
    "html": "html"
  }
  
@@ -89,7 +90,7 @@ class robot_base(Robot):
                  try:
                      size = headers["Content-Length"]
                  except KeyError:
-                    size = len(content)
+                    pass
  
                  try:
                      last_modified = headers["Last-Modified"]
@@ -98,7 +99,8 @@ class robot_base(Robot):
  
                  if last_modified:
                      last_modified = parse_time(last_modified)
-            else:
+
+            if not size:  # Could be None from headers
                  size = len(content)
  
              if last_modified:
@@ -125,7 +127,7 @@ class robot_base(Robot):
              if headers:
                  try:
                      content_type = headers["Content-Type"]
-                    self.log("   Content-Type: %s" % content_type)
+                    self.log("   Content-Type   : %s" % content_type)
                      if content_type is None:
                          if 'html' in content.lower():
                              content_type = 'text/html'
@@ -143,14 +145,19 @@ class robot_base(Robot):
                      except (ValueError, IndexError):
                          charset = None
                          self.log("   no charset in Content-Type header")
+                    is_html = False
                      for ctype in ("text/html", "application/xhtml+xml"):
                          if content_type.startswith(ctype):
-                            html = True
+                            is_html = True
                              break
-                    else:
-                        html = False
-                    if html:
-                        parser = parse_html(content, charset, self.log)
+                    content_stripped = content.strip()
+                    if content_stripped and is_html:
+                        parser = parse_html(
+                            content_stripped, charset, self.log)
+                        if charset:
+                            bookmark.charset = charset
+                        elif parser and parser.meta_charset:
+                            bookmark.charset = parser.meta_charset
                          if parser:
                              bookmark.real_title = parser.title
                              icon = parser.icon
@@ -246,6 +253,10 @@ class robot_base(Robot):
                                                    % (url, timeout)
                                                    )
  
+                    if not content_stripped:
+                        self.log("   empty response, no content")
+                    if not is_html:
+                        self.log("   not html")
                  except KeyError as key:
                      self.log("   no header: %s" % key)