From: Oleg Broytman Date: Fri, 17 Nov 2023 23:54:46 +0000 (+0300) Subject: Fix(parse_html): Do not parse empty strings X-Git-Tag: 5.0.0~20 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=3dcc828780e34c685703def5278607cfd283fb72;p=bookmarks_db.git Fix(parse_html): Do not parse empty strings --- diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py index ac880cc..0aad3dd 100644 --- a/parse_html/bkmk_ph_beautifulsoup.py +++ b/parse_html/bkmk_ph_beautifulsoup.py @@ -62,6 +62,8 @@ def _parse_html(html_text, charset): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None root = _parse_html(html_text, charset) if root is None: return None diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py index 6549683..faafca2 100644 --- a/parse_html/bkmk_ph_beautifulsoup4.py +++ b/parse_html/bkmk_ph_beautifulsoup4.py @@ -39,6 +39,8 @@ def _parse_html(html_text, charset): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None root = _parse_html(html_text, charset) if root is None: return None diff --git a/parse_html/bkmk_ph_etreetidy.py b/parse_html/bkmk_ph_etreetidy.py index 95f2071..eadcca3 100644 --- a/parse_html/bkmk_ph_etreetidy.py +++ b/parse_html/bkmk_ph_etreetidy.py @@ -16,6 +16,8 @@ from .bkmk_ph_util import HTMLParser def parse_html(html_text, charset=None, log=None): + if not html_text: + return None try: html_tree = TidyHTMLTreeBuilder.parseString(html_text) except: diff --git a/parse_html/bkmk_ph_html5.py b/parse_html/bkmk_ph_html5.py index 1fabd82..d973b72 100644 --- a/parse_html/bkmk_ph_html5.py +++ b/parse_html/bkmk_ph_html5.py @@ -16,6 +16,8 @@ from .bkmk_ph_util import HTMLParser def parse_html(html_text, charset=None, log=None): + if not html_text: + return None parser = HTML5Parser() if isinstance(html_text, bytes): html_tree = parser.parse( diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index fd7b687..c0f89b4 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -89,6 +89,8 @@ class HTMLParser(_HTMLParser): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None parser = HTMLParser(charset) try: