From 7f4662c68c41b4dfff5b134d3762591acecba656 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 8 Jan 2008 15:47:01 +0000 Subject: [PATCH] Some sites put TITLE in HTML without HEAD. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@157 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_beautifulsoup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 730e263..209486c 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -63,6 +63,9 @@ def parse_html(filename, charset=None): except AttributeError: return None + if head is None: + head = root.html # Some sites put TITLE in HTML without HEAD + _charset = root.originalEncoding try: title = head.title.string.encode(_charset) -- 2.39.5