From 1a001aafb9fefdbc003ee5ce49ca19d4f4f7e3ec Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Fri, 13 Aug 2010 14:53:58 +0000 Subject: [PATCH] Test for completely broken HTML. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@282 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_lxml.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Robots/parse_html_lxml.py b/Robots/parse_html_lxml.py index 7aba09c..638af4e 100644 --- a/Robots/parse_html_lxml.py +++ b/Robots/parse_html_lxml.py @@ -11,6 +11,9 @@ from parse_html_util import HTMLParser def parse_html(filename, charset=None, log=None): html_tree = parse(filename) + if t.getroot() is None: + return None + title = html_tree.findtext('head/title') if title is None: title = html_tree.findtext('title') -- 2.39.5