From: Oleg Broytman Date: Sun, 16 Dec 2007 19:55:26 +0000 (+0000) Subject: Calculate if the charset came from HTTP or from HTML meta. X-Git-Tag: v4.5.3~268 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=efa27458378a9a253f0002fd060e2f8a2ae5a25f;p=bookmarks_db.git Calculate if the charset came from HTTP or from HTML meta. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@108 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index e0129fd..130b20a 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -23,9 +23,9 @@ def parse_html(filename, charset=None): root = BeautifulSoup(infile, fromEncoding=charset) infile.close() - charset = root.originalEncoding + _charset = root.originalEncoding try: - title = root.html.head.title.string.encode(charset) + title = root.html.head.title.string.encode(_charset) except AttributeError: title = '' @@ -49,7 +49,7 @@ def parse_html(filename, charset=None): else: icon = None - parser = BSoupParser(charset, False, title, refresh, icon) + parser = BSoupParser(_charset, _charset == charset, title, refresh, icon) return parser def _find_refresh(Tag):