]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/parse_html_beautifulsoup.py
Lookup title in the root if not found in head.
[bookmarks_db.git] / Robots / parse_html_beautifulsoup.py
index 8b26d635b763e3eb2b822cd1a871110d1fbb4824..1fa4dc358e723481cfa667767ffb9c6f49318b58 100644 (file)
@@ -77,16 +77,21 @@ def parse_html(filename, charset=None, log=None):
       # Lookup TITLE in the root
       title = root.title
 
-   if title is not None:
-      if title.string:
-         title = title.string.encode(_charset)
-      else:
-         parts = []
-         for part in title:
-            if not isinstance(part, basestring):
-               part = unicode(part)
-            parts.append(part.strip())
-         title = ''.join(parts).encode(_charset)
+   if title is None:
+      return None
+
+   if title.string:
+      title = title.string
+   else:
+      parts = []
+      for part in title:
+         if not isinstance(part, basestring):
+            part = unicode(part)
+         parts.append(part.strip())
+      title = ''.join(parts)
+
+   if _charset or meta_charset:
+      title = title.encode(_charset or meta_charset)
 
    meta = head.find(_find_contenttype, recursive=False)
    if meta: