This file is a part of Bookmarks database and Internet robot.
"""
-__version__ = "$Revision$"[11:-2]
-__revision__ = "$Id$"[5:-2]
-__date__ = "$Date$"[7:-2]
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2011 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2012 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['BkmkParser']
-import sys, os
+import os
+from m_lib.defenc import default_encoding
from m_lib.net.www.html import HTMLParser
from bkmk_objects import Folder, Bookmark, Ruler
dump_names = debug
-DEFAULT_CHARSET = None
-
class BkmkParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.urls = 0
self.objects = 0
- self.charset = ""
+ self.charset = None
self.recode = None
def handle_data(self, data):
if data:
- if DEFAULT_CHARSET:
- data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace")
+ if self.charset and default_encoding:
+ data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
self.accumulator += data
# Mozilla - get charset
self.charset = content.split('=')[1]
except IndexError:
pass
- else:
- global DEFAULT_CHARSET
- DEFAULT_CHARSET = sys.getdefaultencoding()
- if DEFAULT_CHARSET == "ascii":
- try:
- import locale
- except ImportError:
- pass
- else:
- DEFAULT_CHARSET = locale.getpreferredencoding()
def start_title(self, attrs):
- if DEFAULT_CHARSET:
- self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % DEFAULT_CHARSET
+ if default_encoding:
+ self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
self.accumulator += "<TITLE>"
def end_title(self):
add_date = None
last_visit = None
last_modified = None
- keyword = None
+ keyword = ''
icon = None
charset = None
debug("Bookmark points to: `%s'" % href)
bookmark = Bookmark(href, add_date, last_visit, last_modified,
- keyword or '', '', icon, charset)
+ keyword=keyword, icon=icon,
+ charset=charset, parser_charset=self.charset or default_encoding)
self.current_object = bookmark
self.current_folder.append(bookmark)
self.urls += 1