X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_beautifulsoup.py;h=d3778fbbfc55d529aa03f88c7d094c7bc22e07d3;hb=1a001aafb9fefdbc003ee5ce49ca19d4f4f7e3ec;hp=8b26d635b763e3eb2b822cd1a871110d1fbb4824;hpb=163b0e2db2d743501f80ac404007d421b868fece;p=bookmarks_db.git

diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py
index 8b26d63..d3778fb 100644
--- a/Robots/parse_html_beautifulsoup.py
+++ b/Robots/parse_html_beautifulsoup.py
@@ -77,16 +77,18 @@ def parse_html(filename, charset=None, log=None):
       # Lookup TITLE in the root
       title = root.title
 
-   if title is not None:
-      if title.string:
-         title = title.string.encode(_charset)
-      else:
-         parts = []
-         for part in title:
-            if not isinstance(part, basestring):
-               part = unicode(part)
-            parts.append(part.strip())
-         title = ''.join(parts).encode(_charset)
+   if title is None:
+      return None
+
+   if title.string:
+      title = title.string
+   else:
+      parts = []
+      for part in title:
+         if not isinstance(part, basestring):
+            part = unicode(part)
+         parts.append(part.strip())
+      title = ''.join(parts)
 
    meta = head.find(_find_contenttype, recursive=False)
    if meta:
@@ -103,6 +105,9 @@ def parse_html(filename, charset=None, log=None):
    else:
       meta_charset = False
 
+   if charset or meta_charset:
+      title = title.encode(charset or meta_charset)
+
    meta = head.find(_find_refresh, recursive=False)
    if meta:
       refresh = meta.get("content")