]> git.phdru.name Git - bookmarks_db.git/commitdiff
Moved lxml-based parser after BeautifulSoup - it doesn't accept charset.
authorOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 13:38:06 +0000 (13:38 +0000)
committerOleg Broytman <phd@phdru.name>
Fri, 13 Aug 2010 13:38:06 +0000 (13:38 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@281 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html.py

index 5e8061c793e307b0fad3d1163bf3b122884d185c..228a3ceaddff6c0fe1acc8af64a374f348c5bb27 100755 (executable)
@@ -13,19 +13,19 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
 parsers = []
 
 try:
-   from parse_html_lxml import parse_html
+   import parse_html_beautifulsoup
+   parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
 except ImportError:
    pass
 else:
-    parsers.append(parse_html)
+   parsers.append(parse_html_beautifulsoup.parse_html)
 
 try:
-   import parse_html_beautifulsoup
-   parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+   from parse_html_lxml import parse_html
 except ImportError:
    pass
 else:
-   parsers.append(parse_html_beautifulsoup.parse_html)
+    parsers.append(parse_html)
 
 try:
    from parse_html_htmlparser import parse_html