X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bkmk_parser.py;h=7ce99e8412447cbba334291783149f80cc2c3e46;hb=bbcb4777fc62ff721ff60ebc1697ac61acbf0617;hp=37f5f7743d33e0fc48527832559677f8775ba66a;hpb=364bdf891fa30093391583c449ebc7cbab4ec2a5;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py index 37f5f77..7ce99e8 100755 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -1,11 +1,11 @@ """ Parser for Netscape Navigator's and Mozilla's bookmarks.html - Written by BroytMann. Copyright (C) 1997-2003 PhiloSoft Design + Written by BroytMann. Copyright (C) 1997-2005 PhiloSoft Design """ -import os +import sys, os from m_lib.net.www.html import HTMLParser from bkmk_objects import Folder, Bookmark, Ruler @@ -29,6 +29,8 @@ else: dump_names = debug +DEFAULT_CHARSET = None + class BkmkParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) @@ -42,8 +44,8 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: - if self.charset: - data = unicode(data, self.charset).encode() + if DEFAULT_CHARSET: + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") self.accumulator += data @@ -65,9 +67,21 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass + else: + global DEFAULT_CHARSET + DEFAULT_CHARSET = sys.getdefaultencoding() + if DEFAULT_CHARSET == "ascii": + try: + import locale + except ImportError: + pass + else: + DEFAULT_CHARSET = locale.getpreferredencoding() def start_title(self, attrs): + if DEFAULT_CHARSET: + self.accumulator += '\n' % DEFAULT_CHARSET self.accumulator += "" def end_title(self): @@ -93,7 +107,7 @@ class BkmkParser(HTMLParser): self.root_folder.name = accumulator - # Start next folder + # Start a folder def start_h3(self, attrs): for attrname, value in attrs: value = value.strip() @@ -116,24 +130,27 @@ class BkmkParser(HTMLParser): self.current_folder.name = accumulator - # Start bookmark + # Start a bookmark def start_a(self, attrs): last_visit = None last_modified = None + keyword = None for attrname, value in attrs: value = value.strip() - if attrname == 'href': + if attrname == "href": href = value - if attrname == 'add_date': + elif attrname == "add_date": add_date = value - if attrname == 'last_visit': + elif attrname == "last_visit": last_visit = value - if attrname == 'last_modified': + elif attrname == "last_modified": last_modified = value + elif attrname == "shortcuturl": + keyword = value debug("Bookmark points to: `%s'" % href) - bookmark = Bookmark(href, add_date, last_visit, last_modified) + bookmark = Bookmark(href, add_date, last_visit, last_modified, keyword or '') self.current_object = bookmark self.current_folder.append(bookmark) self.urls += 1