From: Oleg Broytman Date: Wed, 13 Feb 2008 18:17:53 +0000 (+0000) Subject: Replace BeautifulSoup's guessed cp1252 with DEFAULT_CHARSET. X-Git-Tag: v4.5.3~200 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=3bbe3e07c526ef31e1e0901c05e077ad406c9300;p=bookmarks_db.git Replace BeautifulSoup's guessed cp1252 with DEFAULT_CHARSET. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@176 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 9b0fade..c7263fe 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -67,6 +67,9 @@ def parse_html(filename, charset=None): head = root.html # Some sites put TITLE in HTML without HEAD _charset = root.originalEncoding + if _charset == "windows-1252": # Replace default + _charset = DEFAULT_CHARSET + try: title = head.title.string.encode(_charset) except AttributeError: