X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bkmk_parser.py;h=02116ccf025c9edacd119362331b64174ce89767;hb=54576c927fe40509d146212dce65c555148a22c5;hp=729c5b301b7cc9fe518ebaf0c3a59ef079deac38;hpb=d8559164bc4acdf293175dec5bf027430e03dc18;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py index 729c5b3..02116cc 100644 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -3,19 +3,17 @@ This file is a part of Bookmarks database and Internet robot. """ -__version__ = "$Revision$"[11:-2] -__revision__ = "$Id$"[5:-2] -__date__ = "$Date$"[7:-2] __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1997-2011 PhiloSoft Design" +__copyright__ = "Copyright (C) 1997-2012 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['BkmkParser'] -import sys, os +import os +from m_lib.defenc import default_encoding from m_lib.net.www.html import HTMLParser -from bkmk_objects import DEFAULT_CHARSET, Folder, Bookmark, Ruler +from bkmk_objects import Folder, Bookmark, Ruler DEBUG = os.environ.has_key("BKMK_DEBUG") @@ -49,8 +47,8 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: - if self.charset and DEFAULT_CHARSET: - data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace") + if self.charset and default_encoding: + data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace") self.accumulator += data # Mozilla - get charset @@ -73,8 +71,8 @@ class BkmkParser(HTMLParser): pass def start_title(self, attrs): - if DEFAULT_CHARSET: - self.accumulator += '\n' % DEFAULT_CHARSET + if default_encoding: + self.accumulator += '\n' % default_encoding self.accumulator += "" def end_title(self): @@ -128,7 +126,7 @@ class BkmkParser(HTMLParser): add_date = None last_visit = None last_modified = None - keyword = None + keyword = '' icon = None charset = None @@ -151,7 +149,8 @@ class BkmkParser(HTMLParser): debug("Bookmark points to: `%s'" % href) bookmark = Bookmark(href, add_date, last_visit, last_modified, - keyword=keyword or '', icon=icon, charset=charset) + keyword=keyword, icon=icon, + charset=charset, parser_charset=self.charset or default_encoding) self.current_object = bookmark self.current_folder.append(bookmark) self.urls += 1