X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parse_html%2Fbkmk_ph_htmlparser.py;h=d11a2ff9fbeab4b5e5ec8daa1c2a7b4205ac63e4;hb=3b51e43f06c97c0d1a2ffbb1c29276acaeb64f07;hp=b90618f1856d37a99bd78e7931421aa67e6d45b3;hpb=cb9c36b39ed72cd1fa272130d2bcf162a89c3013;p=bookmarks_db.git diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index b90618f..d11a2ff 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -11,7 +11,10 @@ __license__ = "GNU GPL" __all__ = ['parse_html'] -from HTMLParser import HTMLParseError +try: + from HTMLParser import HTMLParseError +except ImportError: + class HTMLParseError(Exception): pass from m_lib.net.www.html import HTMLParser as _HTMLParser @@ -86,6 +89,13 @@ class HTMLParser(_HTMLParser): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None + if charset is None and isinstance(html_text, bytes): + return None # html.parser cannot parse bytes + if charset and isinstance(html_text, bytes): + html_text = html_text.decode(charset) + parser = HTMLParser(charset) try: