X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parse_html%2Fbkmk_ph_html5.py;h=d973b729976e59f4225c09de66c1fdfdf689e9f8;hb=15632047d4fb59cc40ef98dcb4a94438ba16f9d4;hp=111e1ed4b4b38360fef84ca41a9276351d0e6c5e;hpb=cb9c36b39ed72cd1fa272130d2bcf162a89c3013;p=bookmarks_db.git diff --git a/parse_html/bkmk_ph_html5.py b/parse_html/bkmk_ph_html5.py index 111e1ed..d973b72 100644 --- a/parse_html/bkmk_ph_html5.py +++ b/parse_html/bkmk_ph_html5.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html'] @@ -16,9 +16,14 @@ from .bkmk_ph_util import HTMLParser def parse_html(html_text, charset=None, log=None): + if not html_text: + return None parser = HTML5Parser() - html_tree = parser.parse( - html_text, encoding=charset, parseMeta=bool(charset)) + if isinstance(html_text, bytes): + html_tree = parser.parse( + html_text, encoding=charset, parseMeta=bool(charset)) + else: + html_tree = parser.parse(html_text) html = None if hasattr(html_tree, 'childNodes'): @@ -73,8 +78,8 @@ def parse_html(html_text, charset=None, log=None): if not charset: charset = parser.tokenizer.stream.charEncoding[0] - if title and (charset or meta_charset): - title = title.encode(charset or meta_charset) + #if title and (charset or meta_charset): + # title = title.encode(charset or meta_charset) for node in head.childNodes: if node.name == 'meta' and \