From: Oleg Broytman Date: Fri, 13 Aug 2010 13:38:06 +0000 (+0000) Subject: Moved lxml-based parser after BeautifulSoup - it doesn't accept charset. X-Git-Tag: v4.5.3~95 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=8bc6571d6eaba3a72ecb543c0fe9b95dbea31634;p=bookmarks_db.git Moved lxml-based parser after BeautifulSoup - it doesn't accept charset. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@281 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 5e8061c..228a3ce 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -13,19 +13,19 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic parsers = [] try: - from parse_html_lxml import parse_html + import parse_html_beautifulsoup + parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET except ImportError: pass else: - parsers.append(parse_html) + parsers.append(parse_html_beautifulsoup.parse_html) try: - import parse_html_beautifulsoup - parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET + from parse_html_lxml import parse_html except ImportError: pass else: - parsers.append(parse_html_beautifulsoup.parse_html) + parsers.append(parse_html) try: from parse_html_htmlparser import parse_html