"""
Parser for Netscape Navigator's and Mozilla's bookmarks.html
- Written by BroytMann. Copyright (C) 1997-2003 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 1997-2005 PhiloSoft Design
"""
-import os
+import sys, os
from m_lib.net.www.html import HTMLParser
from bkmk_objects import Folder, Bookmark, Ruler
dump_names = debug
+DEFAULT_CHARSET = None
+
class BkmkParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
def handle_data(self, data):
if data:
- if self.charset:
- data = unicode(data, self.charset).encode()
+ if DEFAULT_CHARSET:
+ data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace")
self.accumulator += data
self.charset = content.split('=')[1]
except IndexError:
pass
+ else:
+ global DEFAULT_CHARSET
+ DEFAULT_CHARSET = sys.getdefaultencoding()
+ if DEFAULT_CHARSET == "ascii":
+ try:
+ import locale
+ except ImportError:
+ pass
+ else:
+ DEFAULT_CHARSET = locale.getpreferredencoding()
def start_title(self, attrs):
self.root_folder.name = accumulator
- # Start next folder
+ # Start a folder
def start_h3(self, attrs):
for attrname, value in attrs:
value = value.strip()
self.current_folder.name = accumulator
- # Start bookmark
+ # Start a bookmark
def start_a(self, attrs):
last_visit = None
last_modified = None
+ keyword = None
for attrname, value in attrs:
value = value.strip()
- if attrname == 'href':
+ if attrname == "href":
href = value
- if attrname == 'add_date':
+ elif attrname == "add_date":
add_date = value
- if attrname == 'last_visit':
+ elif attrname == "last_visit":
last_visit = value
- if attrname == 'last_modified':
+ elif attrname == "last_modified":
last_modified = value
+ elif attrname == "shortcuturl":
+ keyword = value
debug("Bookmark points to: `%s'" % href)
- bookmark = Bookmark(href, add_date, last_visit, last_modified)
+ bookmark = Bookmark(href, add_date, last_visit, last_modified, keyword or '')
self.current_object = bookmark
self.current_folder.append(bookmark)
self.urls += 1