parsers = []
try:
- import parse_html_html5
+ import parse_html_beautifulsoup
+ parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
except ImportError:
pass
else:
- parsers.append(parse_html_html5.parse_html)
+ parsers.append(parse_html_beautifulsoup.parse_html)
try:
- import parse_html_beautifulsoup
- parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ from parse_html_htmlparser import parse_html
except ImportError:
pass
else:
- parsers.append(parse_html_beautifulsoup.parse_html)
+ parsers.append(parse_html)
-from parse_html_htmlparser import parse_html
-parsers.append(parse_html)
+try:
+ import parse_html_html5
+except ImportError:
+ pass
+else:
+ parsers.append(parse_html_html5.parse_html)
import re
def parse_html(filename, charset=None, log=None):
+ if not parsers:
+ return None
+
if charset:
try:
codecs.lookup(charset) # In case of unknown charset...
else:
if log: log("Parser %s.%s failed, trying next one." % (p.__module__, p.__name__))
+ if not parser:
+ return None
+
converted_title = title = parser.title
if title and (not parser.charset):
try: