__all__ = ['parse_html']
-from HTMLParser import HTMLParseError
+try:
+ from HTMLParser import HTMLParseError
+except ImportError:
+ class HTMLParseError(Exception): pass
from m_lib.net.www.html import HTMLParser as _HTMLParser
def parse_html(html_text, charset=None, log=None):
+ if not html_text:
+ return None
+ if charset is None and isinstance(html_text, bytes):
+ return None # html.parser cannot parse bytes
+ if charset and isinstance(html_text, bytes):
+ html_text = html_text.decode(charset)
+
parser = HTMLParser(charset)
try: