]> git.phdru.name Git - bookmarks_db.git/commitdiff
Split the title into subparts, reassemble the subparts and recode.
authorOleg Broytman <phd@phdru.name>
Fri, 7 Mar 2008 20:56:06 +0000 (20:56 +0000)
committerOleg Broytman <phd@phdru.name>
Fri, 7 Mar 2008 20:56:06 +0000 (20:56 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@208 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html_beautifulsoup.py

index 8f82d249669df7a7d79cc0a203103596f4284936..db291f9fd46df8607709333dbaa304eaba00007e 100644 (file)
@@ -92,7 +92,12 @@ def parse_html(filename, charset=None, log=None):
    elif title.string:
       title = title.string.encode(_charset)
    else:
-      title = str(title)
+      parts = []
+      for part in title:
+         if not isinstance(part, basestring):
+            part = unicode(part)
+         parts.append(part.strip())
+      title = ''.join(parts).encode(_charset)
 
    meta = head.find(_find_contenttype, recursive=False)
    if meta: