]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fixed a bug - check if childNodes not empty.
authorOleg Broytman <phd@phdru.name>
Wed, 11 Aug 2010 20:44:03 +0000 (20:44 +0000)
committerOleg Broytman <phd@phdru.name>
Wed, 11 Aug 2010 20:44:03 +0000 (20:44 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@265 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html_html5.py

index 511e64022936acb6d0eb64a636e6600536e0f514..fcc7e158a20e2e0a6f35aa8840cbb197dd1c8755 100644 (file)
@@ -13,6 +13,9 @@ def parse_html(filename, charset=None, log=None):
     html_tree = HTML5Parser().parse(fp, charset)
     fp.close()
 
+    if not html_tree.childNodes:
+        return None
+
     html = html_tree.childNodes[-1]
     for node in html.childNodes:
         if node.name == 'head':
@@ -43,8 +46,11 @@ def parse_html(filename, charset=None, log=None):
 
         for node in head.childNodes:
             if node.name == 'title':
-                title = node.childNodes[0].value
-                break
+                if node.childNodes:
+                    title = node.childNodes[0].value
+                    break
+                else:
+                    title = ''
 
         if title and (charset or meta_charset):
             title = title.encode(charset or meta_charset)