2 Parser for Netscape Navigator's bookmarks.html
4 Written by BroytMann, Jun 1997 - Jul 2003. Copyright (C) 1997-2003 PhiloSoft Design
9 from m_lib.net.www.html import HTMLParser
10 from bkmk_objects import Folder, Bookmark, Ruler
17 def dump_names(folder_stack):
19 for object in folder_stack:
22 return "'" + string.join(l, "' '") + "'"
30 class BkmkParser(HTMLParser):
32 HTMLParser.__init__(self)
41 def handle_data(self, data):
44 data = unicode(data, self.charset).encode()
45 self.accumulator = "%s%s" % (self.accumulator, data)
48 # Mozilla - get charset
49 def do_meta(self, attrs):
53 for attrname, value in attrs:
54 value = string.strip(value)
55 if attrname == 'http-equiv':
56 http_equiv = value.lower()
57 elif attrname == 'content':
60 if http_equiv == "content-type":
62 # extract charset from "text/html; charset=UTF-8"
63 self.charset = content.split('=')[1]
68 def start_title(self, attrs):
69 self.accumulator = "%s<TITLE>" % self.accumulator
72 self.accumulator = "%s</TITLE>" % self.accumulator
76 def start_h1(self, attrs):
77 root_folder = Folder()
78 self.current_object = root_folder
79 self.root_folder = root_folder
80 self.current_folder = root_folder
81 self.folder_stack = [root_folder]
83 self.root_folder.header = self.accumulator
87 accumulator = self.accumulator
90 debug("Root folder name: `%s'" % accumulator)
91 self.root_folder.name = accumulator
95 def start_h3(self, attrs):
96 for attrname, value in attrs:
97 value = string.strip(value)
98 if attrname == 'add_date':
101 debug("New folder...")
102 folder = Folder(add_date)
103 self.current_object = folder
104 self.current_folder.append(folder)
105 self.folder_stack.append(folder) # push new folder
106 self.current_folder = folder
107 self.objects = self.objects + 1
110 accumulator = self.accumulator
111 self.accumulator = ''
113 debug("Folder name: `%s'" % accumulator)
114 self.current_folder.name = accumulator
118 def start_a(self, attrs):
122 for attrname, value in attrs:
123 value = string.strip(value)
124 if attrname == 'href':
126 if attrname == 'add_date':
128 if attrname == 'last_visit':
130 if attrname == 'last_modified':
131 last_modified = value
133 debug("Bookmark points to: `%s'" % href)
134 bookmark = Bookmark(href, add_date, last_visit, last_modified)
135 self.current_object = bookmark
136 self.current_folder.append(bookmark)
137 self.urls = self.urls + 1
138 self.objects = self.objects + 1
141 accumulator = self.accumulator
142 self.accumulator = ''
144 debug("Bookmark name: `%s'" % accumulator)
145 bookmark = self.current_folder[-1]
146 bookmark.name = accumulator
150 accumulator = self.accumulator
153 self.accumulator = ''
155 current_object = self.current_object
156 current_object.comment = current_object.comment + accumulator
157 debug("Comment: `%s'" % current_object.comment)
160 def start_dl(self, attrs):
170 debug("Folder stack: %s" % dump_names(self.folder_stack))
171 if self.folder_stack:
172 del self.folder_stack[-1] # pop last folder
173 if self.folder_stack:
174 self.current_folder = self.folder_stack[-1]
176 debug("FOLDER STACK is EMPTY!!! (1)")
178 debug("FOLDER STACK is EMPTY!!! (2)")
179 self.current_object = None
183 HTMLParser.close(self)
184 if self.folder_stack:
185 raise ValueError, "wrong folder stack: %s" % self.folder_stack
188 def do_dd(self, attrs):
195 def do_hr(self, attrs):
198 self.current_folder.append(Ruler())
199 self.current_object = None
200 self.objects = self.objects + 1
204 def do_br(self, attrs):
205 self.accumulator = "%s<BR>" % self.accumulator
208 # Allow < in the text
209 def unknown_starttag(self, tag, attrs):
210 self.accumulator = "%s<%s>" % (self.accumulator, tag)
213 # Do not allow unknow end tags
214 def unknown_endtag(self, tag):
215 raise NotImplementedError("Unknow end tag `%s'" % tag)