]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fix(parse_html): encode title to utf-8 as the last resort
authorOleg Broytman <phd@phdru.name>
Sun, 13 Aug 2017 17:41:17 +0000 (20:41 +0300)
committerOleg Broytman <phd@phdru.name>
Sun, 13 Aug 2017 17:41:17 +0000 (20:41 +0300)
parse_html/bkmk_parse_html.py

index fa72e1f2aa14f5296343434895bcf6ccb22e088d..af9395b0d8d735689288a02071a0e1aee80814de 100644 (file)
@@ -152,8 +152,15 @@ def parse_html(html_text, charset=None, log=None):
             try:
                 parser.title = title.encode('ascii')
             except UnicodeEncodeError:
-                parser.charset = DEFAULT_CHARSET
-                parser.title = title.encode(DEFAULT_CHARSET)
+                try:
+                    parser.title = title.encode(DEFAULT_CHARSET)
+                except UnicodeEncodeError:
+                    parser.title = title.encode(universal_charset)
+                    parser.charset = universal_charset
+                else:
+                    parser.charset = DEFAULT_CHARSET
+            else:
+                parser.charset = 'ascii'
 
     converted_title = title = parser.title
     if title and (not parser.charset):