X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=bkmk_parser.py;h=4cc8897e23f7b5737f9203e5664ef988b459acb3;hb=ca8624faf5b9a31e76b7bf3d441f446c8b62c8f6;hp=327e0c45f2dd2beb56711cd2e42eb0747e13bfd7;hpb=1f4d6d3d0e3a55e3390ef0f0e20f2358baa57fc1;p=bookmarks_db.git
diff --git a/bkmk_parser.py b/bkmk_parser.py
old mode 100755
new mode 100644
index 327e0c4..4cc8897
--- a/bkmk_parser.py
+++ b/bkmk_parser.py
@@ -1,7 +1,7 @@
"""
Parser for Netscape Navigator's and Mozilla's bookmarks.html
- Written by BroytMann. Copyright (C) 1997-2003 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
"""
@@ -45,7 +45,7 @@ class BkmkParser(HTMLParser):
def handle_data(self, data):
if data:
if DEFAULT_CHARSET:
- data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace")
+ data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace")
self.accumulator += data
@@ -70,9 +70,18 @@ class BkmkParser(HTMLParser):
else:
global DEFAULT_CHARSET
DEFAULT_CHARSET = sys.getdefaultencoding()
+ if DEFAULT_CHARSET == "ascii":
+ try:
+ import locale
+ except ImportError:
+ pass
+ else:
+ DEFAULT_CHARSET = locale.getpreferredencoding()
def start_title(self, attrs):
+ if DEFAULT_CHARSET:
+ self.accumulator += '\n' % DEFAULT_CHARSET
self.accumulator += "
"
def end_title(self):
@@ -98,15 +107,18 @@ class BkmkParser(HTMLParser):
self.root_folder.name = accumulator
- # Start next folder
+ # Start a folder
def start_h3(self, attrs):
+ last_modified = None
for attrname, value in attrs:
value = value.strip()
if attrname == 'add_date':
add_date = value
+ elif attrname == 'last_modified':
+ last_modified = value
debug("New folder...")
- folder = Folder(add_date)
+ folder = Folder(add_date, last_modified=last_modified)
self.current_object = folder
self.current_folder.append(folder)
self.folder_stack.append(folder) # push new folder
@@ -121,24 +133,34 @@ class BkmkParser(HTMLParser):
self.current_folder.name = accumulator
- # Start bookmark
+ # Start a bookmark
def start_a(self, attrs):
last_visit = None
last_modified = None
+ keyword = None
+ icon = None
+ charset = None
for attrname, value in attrs:
value = value.strip()
- if attrname == 'href':
+ if attrname == "href":
href = value
- if attrname == 'add_date':
+ elif attrname == "add_date":
add_date = value
- if attrname == 'last_visit':
+ elif attrname == "last_visit":
last_visit = value
- if attrname == 'last_modified':
+ elif attrname == "last_modified":
last_modified = value
+ elif attrname == "shortcuturl":
+ keyword = value
+ elif attrname == "icon":
+ icon = value
+ elif attrname == "last_charset":
+ charset = value
debug("Bookmark points to: `%s'" % href)
- bookmark = Bookmark(href, add_date, last_visit, last_modified)
+ bookmark = Bookmark(href, add_date, last_visit, last_modified,
+ keyword or '', '', icon, charset)
self.current_object = bookmark
self.current_folder.append(bookmark)
self.urls += 1