From 3dcc828780e34c685703def5278607cfd283fb72 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 18 Nov 2023 02:54:46 +0300 Subject: [PATCH] Fix(parse_html): Do not parse empty strings --- parse_html/bkmk_ph_beautifulsoup.py | 2 ++ parse_html/bkmk_ph_beautifulsoup4.py | 2 ++ parse_html/bkmk_ph_etreetidy.py | 2 ++ parse_html/bkmk_ph_html5.py | 2 ++ parse_html/bkmk_ph_htmlparser.py | 2 ++ 5 files changed, 10 insertions(+) diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py index ac880cc..0aad3dd 100644 --- a/parse_html/bkmk_ph_beautifulsoup.py +++ b/parse_html/bkmk_ph_beautifulsoup.py @@ -62,6 +62,8 @@ def _parse_html(html_text, charset): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None root = _parse_html(html_text, charset) if root is None: return None diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py index 6549683..faafca2 100644 --- a/parse_html/bkmk_ph_beautifulsoup4.py +++ b/parse_html/bkmk_ph_beautifulsoup4.py @@ -39,6 +39,8 @@ def _parse_html(html_text, charset): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None root = _parse_html(html_text, charset) if root is None: return None diff --git a/parse_html/bkmk_ph_etreetidy.py b/parse_html/bkmk_ph_etreetidy.py index 95f2071..eadcca3 100644 --- a/parse_html/bkmk_ph_etreetidy.py +++ b/parse_html/bkmk_ph_etreetidy.py @@ -16,6 +16,8 @@ from .bkmk_ph_util import HTMLParser def parse_html(html_text, charset=None, log=None): + if not html_text: + return None try: html_tree = TidyHTMLTreeBuilder.parseString(html_text) except: diff --git a/parse_html/bkmk_ph_html5.py b/parse_html/bkmk_ph_html5.py index 1fabd82..d973b72 100644 --- a/parse_html/bkmk_ph_html5.py +++ b/parse_html/bkmk_ph_html5.py @@ -16,6 +16,8 @@ from .bkmk_ph_util import HTMLParser def parse_html(html_text, charset=None, log=None): + if not html_text: + return None parser = HTML5Parser() if isinstance(html_text, bytes): html_tree = parser.parse( diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index fd7b687..c0f89b4 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -89,6 +89,8 @@ class HTMLParser(_HTMLParser): def parse_html(html_text, charset=None, log=None): + if not html_text: + return None parser = HTMLParser(charset) try: -- 2.39.2