]> git.phdru.name Git - bookmarks_db.git/commitdiff
Test for completely broken HTML.
authorOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 14:53:58 +0000 (14:53 +0000)
committerOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 14:53:58 +0000 (14:53 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@282 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html_lxml.py

index 7aba09cbf5d426e2edf21c96fc4ae3375cef36ac..638af4e972cab376c1fa2f142494a8eea74b9cce 100644 (file)
@@ -11,6 +11,9 @@ from parse_html_util import HTMLParser
 def parse_html(filename, charset=None, log=None):
     html_tree = parse(filename)
 
+    if t.getroot() is None:
+        return None
+
     title = html_tree.findtext('head/title')
     if title is None:
         title = html_tree.findtext('title')