From 99128bb510fe0114d77dd8365997eff53a6ab8de Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 11 Aug 2010 20:44:03 +0000 Subject: [PATCH] Fixed a bug - check if childNodes not empty. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@265 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_html5.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Robots/parse_html_html5.py b/Robots/parse_html_html5.py index 511e640..fcc7e15 100644 --- a/Robots/parse_html_html5.py +++ b/Robots/parse_html_html5.py @@ -13,6 +13,9 @@ def parse_html(filename, charset=None, log=None): html_tree = HTML5Parser().parse(fp, charset) fp.close() + if not html_tree.childNodes: + return None + html = html_tree.childNodes[-1] for node in html.childNodes: if node.name == 'head': @@ -43,8 +46,11 @@ def parse_html(filename, charset=None, log=None): for node in head.childNodes: if node.name == 'title': - title = node.childNodes[0].value - break + if node.childNodes: + title = node.childNodes[0].value + break + else: + title = '' if title and (charset or meta_charset): title = title.encode(charset or meta_charset) -- 2.39.2