X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_lxml.py;h=8f658e1a026d9d16789f69bf43c5cf8a8d45fd83;hb=a788e94901728a5ea127b2c09e3a13ff6a5447b9;hp=7aba09cbf5d426e2edf21c96fc4ae3375cef36ac;hpb=b747da40daf40c8b32a437758d7b4244d752838a;p=bookmarks_db.git diff --git a/Robots/parse_html_lxml.py b/Robots/parse_html_lxml.py index 7aba09c..8f658e1 100644 --- a/Robots/parse_html_lxml.py +++ b/Robots/parse_html_lxml.py @@ -11,6 +11,9 @@ from parse_html_util import HTMLParser def parse_html(filename, charset=None, log=None): html_tree = parse(filename) + if html_tree.getroot() is None: + return None + title = html_tree.findtext('head/title') if title is None: title = html_tree.findtext('title')