"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
def parse_html(html_text, charset=None, log=None):
+ if not html_text:
+ return None
parser = HTML5Parser()
- html_tree = parser.parse(
- html_text, encoding=charset, parseMeta=bool(charset))
+ if isinstance(html_text, bytes):
+ html_tree = parser.parse(
+ html_text, encoding=charset, parseMeta=bool(charset))
+ else:
+ html_tree = parser.parse(html_text)
html = None
if hasattr(html_tree, 'childNodes'):
if not charset:
charset = parser.tokenizer.stream.charEncoding[0]
- if title and (charset or meta_charset):
- title = title.encode(charset or meta_charset)
+ #if title and (charset or meta_charset):
+ # title = title.encode(charset or meta_charset)
for node in head.childNodes:
if node.name == 'meta' and \