From: Oleg Broytman Date: Sun, 13 Aug 2017 17:41:17 +0000 (+0300) Subject: Fix(parse_html): encode title to utf-8 as the last resort X-Git-Tag: 5.0.0~104 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=7fc9a9ac1bfa749aa30e3ae1d730ac4f266db950 Fix(parse_html): encode title to utf-8 as the last resort --- diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py index fa72e1f..af9395b 100644 --- a/parse_html/bkmk_parse_html.py +++ b/parse_html/bkmk_parse_html.py @@ -152,8 +152,15 @@ def parse_html(html_text, charset=None, log=None): try: parser.title = title.encode('ascii') except UnicodeEncodeError: - parser.charset = DEFAULT_CHARSET - parser.title = title.encode(DEFAULT_CHARSET) + try: + parser.title = title.encode(DEFAULT_CHARSET) + except UnicodeEncodeError: + parser.title = title.encode(universal_charset) + parser.charset = universal_charset + else: + parser.charset = DEFAULT_CHARSET + else: + parser.charset = 'ascii' converted_title = title = parser.title if title and (not parser.charset):