From: Oleg Broytman Date: Fri, 13 Aug 2010 13:06:20 +0000 (+0000) Subject: Lookup title in html if not found in head. X-Git-Tag: v4.5.3~99 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=331fbf809d7bd3cc14faeb7b3a72b25bc54f6dfb Lookup title in html if not found in head. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@277 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html_html5.py b/Robots/parse_html_html5.py index 2302051..43e8d74 100644 --- a/Robots/parse_html_html5.py +++ b/Robots/parse_html_html5.py @@ -83,4 +83,16 @@ def parse_html(filename, charset=None, log=None): icon = node.attributes['href'] break + else: + for node in html.childNodes: + if node.name == 'title': + if node.childNodes: + title = node.childNodes[0].value + break + else: + title = '' + + if title is None: + return None + return HTMLParser(charset, meta_charset, title, refresh, icon)