-#! /usr/bin/env python
"""
HTML Parsers wrapper
parsers = []
try:
- import parse_html_beautifulsoup
- parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ from . import beautifulsoup
except ImportError:
pass
else:
- parsers.append(parse_html_beautifulsoup.parse_html)
+ beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ parsers.append(beautifulsoup.parse_html)
try:
- from parse_html_lxml import parse_html
+ from .lxml import parse_html
except ImportError:
pass
else:
parsers.append(parse_html)
try:
- from parse_html_htmlparser import parse_html
+ from .htmlparser import parse_html
except ImportError:
pass
else:
parsers.append(parse_html)
try:
- import parse_html_html5
+ from . import html5
except ImportError:
pass
else:
- parsers.append(parse_html_html5.parse_html)
+ parsers.append(html5.parse_html)
# ElementTidy often segfaults
#try:
-# import parse_html_etreetidy
+# from . import etreetidy
#except ImportError:
# pass
#else:
-# parsers.append(parse_html_etreetidy.parse_html)
+# parsers.append(etreetidy.parse_html)
import re
from htmlentitydefs import name2codepoint
for c in charsets:
try:
parser = p(filename, c, log)
- break
except UnicodeEncodeError:
pass
+ else:
+ break
if parser:
break
else:
return parser
-if __name__ == '__main__':
+def test():
import sys
l = len(sys.argv)