current_charset = default_encoding.replace("windows-", "cp")
DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
-from parse_html_htmlparser import parse_html as _parse_html
-
-
-class HTMLParser(object):
- def __init__(self, charset=None):
- _HTMLParser.__init__(self)
- self.charset = charset
- self.meta_charset = 0
- self.title = ''
- self.refresh = ''
- self.icon = None
+try:
+ from parse_html_beautifulsoup import parse_html as _parse_html
+except ImportError:
+ from parse_html_htmlparser import parse_html as _parse_html
import re
if __name__ == '__main__':
import sys
- parser = parse_html(sys.argv[1])
+ parser = parse_html(sys.argv[1], current_charset)
print parser.charset
print parser.title
print parser.refresh