X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parse_html%2Fbkmk_ph_htmlparser.py;h=d11a2ff9fbeab4b5e5ec8daa1c2a7b4205ac63e4;hb=d454f1d6aa7f2430d502d847693515f69489c66c;hp=b90618f1856d37a99bd78e7931421aa67e6d45b3;hpb=cb9c36b39ed72cd1fa272130d2bcf162a89c3013;p=bookmarks_db.git diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index b90618f..d11a2ff 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -11,7 +11,10 @@ __license__ = "GNU GPL" __all__ = ['parse_html'] -from HTMLParser import HTMLParseError +try: + from HTMLParser import HTMLParseError +except ImportError: + class HTMLParseError(Exception): pass from m_lib.net.www.html import HTMLParser as _HTMLParser @@ -86,6 +89,13 @@ class HTMLParser(_HTMLParser): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None + if charset is None and isinstance(html_text, bytes): + return None # html.parser cannot parse bytes + if charset and isinstance(html_text, bytes): + html_text = html_text.decode(charset) + parser = HTMLParser(charset) try: