From 8bc6571d6eaba3a72ecb543c0fe9b95dbea31634 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Fri, 13 Aug 2010 13:38:06 +0000 Subject: [PATCH] Moved lxml-based parser after BeautifulSoup - it doesn't accept charset. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@281 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 5e8061c..228a3ce 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -13,19 +13,19 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic parsers = [] try: - from parse_html_lxml import parse_html + import parse_html_beautifulsoup + parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET except ImportError: pass else: - parsers.append(parse_html) + parsers.append(parse_html_beautifulsoup.parse_html) try: - import parse_html_beautifulsoup - parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET + from parse_html_lxml import parse_html except ImportError: pass else: - parsers.append(parse_html_beautifulsoup.parse_html) + parsers.append(parse_html) try: from parse_html_htmlparser import parse_html -- 2.39.5