"""
HTML Parsers wrapper
- Written by Broytman. Copyright (C) 1997-2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 1997-2011 PhiloSoft Design
"""
import codecs
else:
parsers.append(parse_html_beautifulsoup.parse_html)
+try:
+ from parse_html_lxml import parse_html
+except ImportError:
+ pass
+else:
+ parsers.append(parse_html)
+
try:
from parse_html_htmlparser import parse_html
except ImportError:
else:
parsers.append(parse_html_html5.parse_html)
+# ElementTidy often segfaults
+#try:
+# import parse_html_etreetidy
+#except ImportError:
+# pass
+#else:
+# parsers.append(parse_html_etreetidy.parse_html)
import re
from htmlentitydefs import name2codepoint
final_title = ' '.join([s for s in parts if s])
if log and (final_title <> converted_title): log(" final title : %s" % final_title)
parser.title = final_title
+
+ icon = parser.icon
+ if isinstance(icon, unicode):
+ try:
+ parser.icon = icon.encode('ascii')
+ except UnicodeEncodeError:
+ if parser.charset:
+ parser.icon = icon.encode(parser.charset)
return parser