From: Oleg Broytman Date: Fri, 13 Aug 2010 14:53:58 +0000 (+0000) Subject: Test for completely broken HTML. X-Git-Tag: v4.5.3~94 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=1a001aafb9fefdbc003ee5ce49ca19d4f4f7e3ec;p=bookmarks_db.git Test for completely broken HTML. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@282 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html_lxml.py b/Robots/parse_html_lxml.py index 7aba09c..638af4e 100644 --- a/Robots/parse_html_lxml.py +++ b/Robots/parse_html_lxml.py @@ -11,6 +11,9 @@ from parse_html_util import HTMLParser def parse_html(filename, charset=None, log=None): html_tree = parse(filename) + if t.getroot() is None: + return None + title = html_tree.findtext('head/title') if title is None: title = html_tree.findtext('title')