-"""
- Parser for Netscape Navigator's and Mozilla's bookmarks.html
+"""Parser for Netscape Navigator's and Mozilla's bookmarks.html
- Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
+This file is a part of Bookmarks database and Internet robot.
"""
+__version__ = "$Revision$"[11:-2]
+__revision__ = "$Id$"[5:-2]
+__date__ = "$Date$"[7:-2]
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 1997-2011 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['BkmkParser']
+
import sys, os
from m_lib.net.www.html import HTMLParser
self.charset = ""
self.recode = None
-
def handle_data(self, data):
if data:
if DEFAULT_CHARSET:
data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "xmlcharrefreplace")
self.accumulator += data
-
# Mozilla - get charset
def do_meta(self, attrs):
http_equiv = ""
else:
DEFAULT_CHARSET = locale.getpreferredencoding()
-
def start_title(self, attrs):
if DEFAULT_CHARSET:
self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % DEFAULT_CHARSET
def end_title(self):
self.accumulator += "</TITLE>"
-
# Start root folder
def start_h1(self, attrs):
root_folder = Folder()
debug("Root folder name: `%s'" % accumulator)
self.root_folder.name = accumulator
-
# Start a folder
def start_h3(self, attrs):
last_modified = None
debug("Folder name: `%s'" % accumulator)
self.current_folder.name = accumulator
-
# Start a bookmark
def start_a(self, attrs):
+ add_date = None
last_visit = None
last_modified = None
keyword = None
bookmark = self.current_folder[-1]
bookmark.name = accumulator
-
def flush(self):
accumulator = self.accumulator
current_object.comment += accumulator.strip()
debug("Comment: `%s'" % current_object.comment)
-
def start_dl(self, attrs):
self.flush()
do_dt = start_dl
-
# End of folder
def end_dl(self):
self.flush()
debug("FOLDER STACK is EMPTY!!! (2)")
self.current_object = None
-
def close(self):
HTMLParser.close(self)
if self.folder_stack:
raise ValueError, "wrong folder stack: %s" % self.folder_stack
-
def do_dd(self, attrs):
pass
do_p = do_dd
-
# Start ruler
def do_hr(self, attrs):
self.flush()
self.current_object = None
self.objects += 1
-
# BR in comment
def do_br(self, attrs):
self.accumulator += "<BR>"
-
# Allow < in the text
def unknown_starttag(self, tag, attrs):
self.accumulator += "<%s>" % tag
-
# Do not allow unknow end tags
def unknown_endtag(self, tag):
raise NotImplementedError("Unknow end tag `%s'" % tag)