X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bkmk_parser.py;h=d4b6a2f32e05d6a8c3676f1415afa7ef1f441dfd;hb=3c9507aec371fb56916d3a91e3c41efc29d40900;hp=e3b3db02532567aef8060c4546f1f3a6af6039e2;hpb=c9cfb54ef36e77aee7187e4533f063d407073ad8;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py index e3b3db0..d4b6a2f 100644 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -13,7 +13,8 @@ __license__ = "GNU GPL" __all__ = ['BkmkParser'] -import sys, os +import os +from m_lib.defenc import default_encoding from m_lib.net.www.html import HTMLParser from bkmk_objects import Folder, Bookmark, Ruler @@ -37,8 +38,6 @@ else: dump_names = debug -DEFAULT_CHARSET = None - class BkmkParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) @@ -46,13 +45,13 @@ class BkmkParser(HTMLParser): self.urls = 0 self.objects = 0 - self.charset = "" + self.charset = None self.recode = None def handle_data(self, data): if data: - if DEFAULT_CHARSET: - data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace") + if self.charset and default_encoding: + data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace") self.accumulator += data # Mozilla - get charset @@ -73,20 +72,10 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass - else: - global DEFAULT_CHARSET - DEFAULT_CHARSET = sys.getdefaultencoding() - if DEFAULT_CHARSET == "ascii": - try: - import locale - except ImportError: - pass - else: - DEFAULT_CHARSET = locale.getpreferredencoding() def start_title(self, attrs): - if DEFAULT_CHARSET: - self.accumulator += '\n' % DEFAULT_CHARSET + if default_encoding: + self.accumulator += '\n' % default_encoding self.accumulator += "" def end_title(self): @@ -140,7 +129,7 @@ class BkmkParser(HTMLParser): add_date = None last_visit = None last_modified = None - keyword = None + keyword = '' icon = None charset = None @@ -163,7 +152,8 @@ class BkmkParser(HTMLParser): debug("Bookmark points to: `%s'" % href) bookmark = Bookmark(href, add_date, last_visit, last_modified, - keyword or '', '', icon, charset) + keyword=keyword, icon=icon, + charset=charset, parser_charset=self.charset or default_encoding) self.current_object = bookmark self.current_folder.append(bookmark) self.urls += 1