]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/parse_html_beautifulsoup.py
Split the title into subparts, reassemble the subparts and recode.
[bookmarks_db.git] / Robots / parse_html_beautifulsoup.py
index 8f82d249669df7a7d79cc0a203103596f4284936..db291f9fd46df8607709333dbaa304eaba00007e 100644 (file)
@@ -92,7 +92,12 @@ def parse_html(filename, charset=None, log=None):
    elif title.string:
       title = title.string.encode(_charset)
    else:
-      title = str(title)
+      parts = []
+      for part in title:
+         if not isinstance(part, basestring):
+            part = unicode(part)
+         parts.append(part.strip())
+      title = ''.join(parts).encode(_charset)
 
    meta = head.find(_find_contenttype, recursive=False)
    if meta: