From: Oleg Broytman Date: Thu, 14 Apr 2011 15:20:20 +0000 (+0000) Subject: Get default charset from m_lib, if available. X-Git-Tag: v4.5.3~45 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=97aa08d47caad9eb70df496aa65e63b57b9cd5b6;p=bookmarks_db.git Get default charset from m_lib, if available. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@331 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/bkmk_parser.py b/bkmk_parser.py index e3b3db0..02b20d7 100644 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -37,7 +37,10 @@ else: dump_names = debug -DEFAULT_CHARSET = None +try: + from m_lib.defenc import default_encoding as DEFAULT_CHARSET +except ImportError: + DEFAULT_CHARSET = None class BkmkParser(HTMLParser): def __init__(self): @@ -46,12 +49,12 @@ class BkmkParser(HTMLParser): self.urls = 0 self.objects = 0 - self.charset = "" + self.charset = None self.recode = None def handle_data(self, data): if data: - if DEFAULT_CHARSET: + if self.charset and DEFAULT_CHARSET: data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace") self.accumulator += data @@ -73,16 +76,6 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass - else: - global DEFAULT_CHARSET - DEFAULT_CHARSET = sys.getdefaultencoding() - if DEFAULT_CHARSET == "ascii": - try: - import locale - except ImportError: - pass - else: - DEFAULT_CHARSET = locale.getpreferredencoding() def start_title(self, attrs): if DEFAULT_CHARSET: