X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=bkmk_parser.py;h=3f2182cf4c15e3f73631874d54aada5ff4ac55e0;hb=99128bb510fe0114d77dd8365997eff53a6ab8de;hp=dfe7ce2f463ef783079dd996b739075dce6ae073;hpb=33c634db52c188f2d531dc896079f0370c40583d;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py old mode 100755 new mode 100644 index dfe7ce2..3f2182c --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -1,7 +1,7 @@ """ Parser for Netscape Navigator's and Mozilla's bookmarks.html - Written by BroytMann. Copyright (C) 1997-2004 PhiloSoft Design + Written by Broytman. Copyright (C) 1997-2008 PhiloSoft Design """ @@ -45,7 +45,7 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: if DEFAULT_CHARSET: - data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace") self.accumulator += data @@ -70,9 +70,18 @@ class BkmkParser(HTMLParser): else: global DEFAULT_CHARSET DEFAULT_CHARSET = sys.getdefaultencoding() + if DEFAULT_CHARSET == "ascii": + try: + import locale + except ImportError: + pass + else: + DEFAULT_CHARSET = locale.getpreferredencoding() def start_title(self, attrs): + if DEFAULT_CHARSET: + self.accumulator += '\n' % DEFAULT_CHARSET self.accumulator += "