2 Parser for Netscape Navigator's and Mozilla's bookmarks.html
4 Written by BroytMann. Copyright (C) 1997-2003 PhiloSoft Design
9 from m_lib.net.www.html import HTMLParser
10 from bkmk_objects import Folder, Bookmark, Ruler
13 DEBUG = os.environ.has_key("BKMK_DEBUG")
19 def dump_names(folder_stack):
21 for object in folder_stack:
24 return "'%s'" % "' '".join(l)
32 class BkmkParser(HTMLParser):
34 HTMLParser.__init__(self)
43 def handle_data(self, data):
46 data = unicode(data, self.charset).encode()
47 self.accumulator += data
50 # Mozilla - get charset
51 def do_meta(self, attrs):
55 for attrname, value in attrs:
57 if attrname == 'http-equiv':
58 http_equiv = value.lower()
59 elif attrname == 'content':
62 if http_equiv == "content-type":
64 # extract charset from "text/html; charset=UTF-8"
65 self.charset = content.split('=')[1]
70 def start_title(self, attrs):
71 self.accumulator += "<TITLE>"
74 self.accumulator += "</TITLE>"
78 def start_h1(self, attrs):
79 root_folder = Folder()
80 self.current_object = root_folder
81 self.root_folder = root_folder
82 self.current_folder = root_folder
83 self.folder_stack = [root_folder]
85 self.root_folder.header = self.accumulator.strip()
89 accumulator = self.accumulator
92 debug("Root folder name: `%s'" % accumulator)
93 self.root_folder.name = accumulator
97 def start_h3(self, attrs):
98 for attrname, value in attrs:
100 if attrname == 'add_date':
103 debug("New folder...")
104 folder = Folder(add_date)
105 self.current_object = folder
106 self.current_folder.append(folder)
107 self.folder_stack.append(folder) # push new folder
108 self.current_folder = folder
112 accumulator = self.accumulator
113 self.accumulator = ''
115 debug("Folder name: `%s'" % accumulator)
116 self.current_folder.name = accumulator
120 def start_a(self, attrs):
124 for attrname, value in attrs:
125 value = value.strip()
126 if attrname == 'href':
128 if attrname == 'add_date':
130 if attrname == 'last_visit':
132 if attrname == 'last_modified':
133 last_modified = value
135 debug("Bookmark points to: `%s'" % href)
136 bookmark = Bookmark(href, add_date, last_visit, last_modified)
137 self.current_object = bookmark
138 self.current_folder.append(bookmark)
143 accumulator = self.accumulator
144 self.accumulator = ''
146 debug("Bookmark name: `%s'" % accumulator)
147 bookmark = self.current_folder[-1]
148 bookmark.name = accumulator
152 accumulator = self.accumulator
155 self.accumulator = ''
157 current_object = self.current_object
159 current_object.comment += accumulator.strip()
160 debug("Comment: `%s'" % current_object.comment)
163 def start_dl(self, attrs):
173 debug("Folder stack: %s" % dump_names(self.folder_stack))
174 if self.folder_stack:
175 del self.folder_stack[-1] # pop last folder
176 if self.folder_stack:
177 self.current_folder = self.folder_stack[-1]
179 debug("FOLDER STACK is EMPTY!!! (1)")
181 debug("FOLDER STACK is EMPTY!!! (2)")
182 self.current_object = None
186 HTMLParser.close(self)
187 if self.folder_stack:
188 raise ValueError, "wrong folder stack: %s" % self.folder_stack
191 def do_dd(self, attrs):
198 def do_hr(self, attrs):
201 self.current_folder.append(Ruler())
202 self.current_object = None
207 def do_br(self, attrs):
208 self.accumulator += "<BR>"
211 # Allow < in the text
212 def unknown_starttag(self, tag, attrs):
213 self.accumulator += "<%s>" % tag
216 # Do not allow unknow end tags
217 def unknown_endtag(self, tag):
218 raise NotImplementedError("Unknow end tag `%s'" % tag)