]> git.phdru.name Git - bookmarks_db.git/blobdiff - parse_html/bkmk_parse_html.py
Fix(parse_html): encode title to utf-8 as the last resort
[bookmarks_db.git] / parse_html / bkmk_parse_html.py
index fa72e1f2aa14f5296343434895bcf6ccb22e088d..af9395b0d8d735689288a02071a0e1aee80814de 100644 (file)
@@ -152,8 +152,15 @@ def parse_html(html_text, charset=None, log=None):
             try:
                 parser.title = title.encode('ascii')
             except UnicodeEncodeError:
-                parser.charset = DEFAULT_CHARSET
-                parser.title = title.encode(DEFAULT_CHARSET)
+                try:
+                    parser.title = title.encode(DEFAULT_CHARSET)
+                except UnicodeEncodeError:
+                    parser.title = title.encode(universal_charset)
+                    parser.charset = universal_charset
+                else:
+                    parser.charset = DEFAULT_CHARSET
+            else:
+                parser.charset = 'ascii'
 
     converted_title = title = parser.title
     if title and (not parser.charset):