From: Oleg Broytman Date: Sat, 23 Feb 2008 22:34:24 +0000 (+0000) Subject: Try a list of charsets, including the universal (utf-8) and the default ones. X-Git-Tag: v4.5.3~198 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=83354a5dc4b5b3fd4e2c3e8a1b43487254b4bcde;p=bookmarks_db.git Try a list of charsets, including the universal (utf-8) and the default ones. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@178 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html.py b/Robots/parse_html.py index c7acb64..6fe1df9 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -56,7 +56,15 @@ def parse_html(filename, charset=None, log=None): charset = None # ...try charset from HTML for p in parsers: - parser = p(filename, charset) + charsets = [universal_charset, DEFAULT_CHARSET] + if charset not in charsets: + charsets.insert(0, charset) + parser = None + for c in charsets: + try: + parser = p(filename, c) + except UnicodeEncodeError: + pass if parser: break else: