X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html.py;h=a83585df318afb1602a916a1a17768485a099138;hb=8ce74f839238093d7278aa041ff55dbcb7abd3a0;hp=bc9a8d4950fc2ae430a111c423c78c3a5c5755b9;hpb=f8c96fe94544f486e9e76640d16fc355daa4db70;p=bookmarks_db.git diff --git a/Robots/parse_html.py b/Robots/parse_html.py index bc9a8d4..a83585d 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -20,6 +20,13 @@ except ImportError: else: parsers.append(parse_html_beautifulsoup.parse_html) +try: + from parse_html_lxml import parse_html +except ImportError: + pass +else: + parsers.append(parse_html) + try: from parse_html_htmlparser import parse_html except ImportError: @@ -136,6 +143,14 @@ def parse_html(filename, charset=None, log=None): final_title = ' '.join([s for s in parts if s]) if log and (final_title <> converted_title): log(" final title : %s" % final_title) parser.title = final_title + + icon = parser.icon + if isinstance(icon, unicode): + try: + parser.icon = icon.encode('ascii') + except UnicodeEncodeError: + if parser.charset: + parser.icon = icon.encode(parser.charset) return parser