From ea4316e0c474e17787d3a50fffb1d299e8604cc4 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 8 Jan 2008 11:20:40 +0000 Subject: [PATCH] If there is HEAD but no TITLE - return empty title. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@155 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_beautifulsoup.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index d73e4c3..62ef277 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -58,19 +58,24 @@ def parse_html(filename, charset=None): finally: infile.close() - _charset = root.originalEncoding try: - title = root.html.head.title.string.encode(_charset) + head = root.html.head except AttributeError: return None - meta = root.html.head.find(_find_refresh, recursive=False) + _charset = root.originalEncoding + try: + title = head.title.string.encode(_charset) + except AttributeError: + return '' # HEAD but no TITLE + + meta = head.find(_find_refresh, recursive=False) if meta: refresh = meta.get("content") else: refresh = None - meta = root.html.head.find(_find_icon, recursive=False) + meta = head.find(_find_icon, recursive=False) if meta: icon = meta.get("href") else: -- 2.39.5