]> git.phdru.name Git - bookmarks_db.git/commitdiff
Lookup title in html if not found in head.
authorOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 13:06:20 +0000 (13:06 +0000)
committerOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 13:06:20 +0000 (13:06 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@277 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html_html5.py

index 23020515aca8532d84ddbe3088f2d09f638c3c37..43e8d74ba957f658ffef276cadb2782ac27e84e5 100644 (file)
@@ -83,4 +83,16 @@ def parse_html(filename, charset=None, log=None):
                 icon = node.attributes['href']
                 break
 
+    else:
+        for node in html.childNodes:
+            if node.name == 'title':
+                if node.childNodes:
+                    title = node.childNodes[0].value
+                    break
+                else:
+                    title = ''
+
+        if title is None:
+            return None
+
     return HTMLParser(charset, meta_charset, title, refresh, icon)