X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bkmk_parser.py;h=2ec412c012c2e1c385bad0ad60349bdbb89e6f6f;hb=a01b9869e595d0c1cc5ebeb6f5a1da66d45c3d22;hp=d43674138e33f4f2eccc2b577a9ff2656c725cb9;hpb=a394f04fc041e42c9b2551a19770dc6e56c047b2;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py old mode 100755 new mode 100644 index d436741..2ec412c --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -1,9 +1,14 @@ -""" - Parser for Netscape Navigator's and Mozilla's bookmarks.html +"""Parser for Netscape Navigator's and Mozilla's bookmarks.html - Written by BroytMann. Copyright (C) 1997-2005 PhiloSoft Design +This file is a part of Bookmarks database and Internet robot. """ +__version__ = "$Revision$"[11:-2] +__revision__ = "$Id$"[5:-2] +__date__ = "$Date$"[7:-2] +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 1997-2011 PhiloSoft Design" +__license__ = "GNU GPL" import sys, os from m_lib.net.www.html import HTMLParser @@ -45,7 +50,7 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: if DEFAULT_CHARSET: - data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace") self.accumulator += data @@ -80,6 +85,8 @@ class BkmkParser(HTMLParser): def start_title(self, attrs): + if DEFAULT_CHARSET: + self.accumulator += '\n' % DEFAULT_CHARSET self.accumulator += "" def end_title(self): @@ -107,13 +114,16 @@ class BkmkParser(HTMLParser): # Start a folder def start_h3(self, attrs): + last_modified = None for attrname, value in attrs: value = value.strip() if attrname == 'add_date': add_date = value + elif attrname == 'last_modified': + last_modified = value debug("New folder...") - folder = Folder(add_date) + folder = Folder(add_date, last_modified=last_modified) self.current_object = folder self.current_folder.append(folder) self.folder_stack.append(folder) # push new folder @@ -130,9 +140,12 @@ class BkmkParser(HTMLParser): # Start a bookmark def start_a(self, attrs): + add_date = None last_visit = None last_modified = None keyword = None + icon = None + charset = None for attrname, value in attrs: value = value.strip() @@ -146,9 +159,14 @@ class BkmkParser(HTMLParser): last_modified = value elif attrname == "shortcuturl": keyword = value + elif attrname == "icon": + icon = value + elif attrname == "last_charset": + charset = value debug("Bookmark points to: `%s'" % href) - bookmark = Bookmark(href, add_date, last_visit, last_modified, keyword or '') + bookmark = Bookmark(href, add_date, last_visit, last_modified, + keyword or '', '', icon, charset) self.current_object = bookmark self.current_folder.append(bookmark) self.urls += 1