2 Parser for Netscape Navigator's bookmarks.html
4 Written by BroytMann, Jun 1997 - Jun 2002. Copyright (C) 1997-2002 PhiloSoft Design
9 from m_lib.www.html import HTMLParser
10 from bkmk_objects import Folder, Bookmark, Ruler
17 def dump_names(folder_stack):
19 for object in folder_stack:
22 return "'" + string.join(l, "' '") + "'"
30 class BkmkParser(HTMLParser):
32 HTMLParser.__init__(self)
41 def handle_data(self, data):
44 data = unicode(data, self.charset).encode()
45 self.accumulator = "%s%s" % (self.accumulator, data)
48 # Mozilla - get charset
49 def do_meta(self, attrs):
53 for attrname, value in attrs:
54 value = string.strip(value)
55 if attrname == 'http-equiv':
56 http_equiv = value.lower()
57 elif attrname == 'content':
60 if http_equiv == "content-type":
62 # extract charset from "text/html; charset=UTF-8"
63 self.charset = content.split('=')[1]
68 def start_title(self, attrs):
69 self.accumulator = "%s<TITLE>" % self.accumulator
72 self.accumulator = "%s</TITLE>" % self.accumulator
76 def start_h1(self, attrs):
77 root_folder = Folder()
78 self.current_object = root_folder
79 self.root_folder = root_folder
80 self.current_folder = root_folder
81 self.folder_stack = [root_folder]
83 self.root_folder.header = self.accumulator
87 accumulator = self.accumulator
90 debug("Root folder name: `%s'" % accumulator)
91 self.root_folder.name = accumulator
95 def start_h3(self, attrs):
96 for attrname, value in attrs:
97 value = string.strip(value)
98 if attrname == 'add_date':
101 debug("New folder...")
102 folder = Folder(add_date)
103 self.current_object = folder
104 self.current_folder.append(folder)
105 self.folder_stack.append(folder) # push new folder
106 self.current_folder = folder
107 self.objects = self.objects + 1
110 accumulator = self.accumulator
111 self.accumulator = ''
113 debug("Folder name: `%s'" % accumulator)
114 self.current_folder.name = accumulator
118 def start_a(self, attrs):
119 for attrname, value in attrs:
120 value = string.strip(value)
121 if attrname == 'href':
123 if attrname == 'add_date':
125 if attrname == 'last_visit':
127 if attrname == 'last_modified':
128 last_modified = value
130 debug("Bookmark points to: `%s'" % href)
131 bookmark = Bookmark(href, add_date, last_visit, last_modified)
132 self.current_object = bookmark
133 self.current_folder.append(bookmark)
134 self.urls = self.urls + 1
135 self.objects = self.objects + 1
138 accumulator = self.accumulator
139 self.accumulator = ''
141 debug("Bookmark name: `%s'" % accumulator)
142 bookmark = self.current_folder[-1]
143 bookmark.name = accumulator
147 accumulator = self.accumulator
150 self.accumulator = ''
152 current_object = self.current_object
153 current_object.comment = current_object.comment + accumulator
154 debug("Comment: `%s'" % current_object.comment)
157 def start_dl(self, attrs):
167 debug("Folder stack: %s" % dump_names(self.folder_stack))
168 if self.folder_stack:
169 del self.folder_stack[-1] # pop last folder
170 if self.folder_stack:
171 self.current_folder = self.folder_stack[-1]
173 debug("FOLDER STACK is EMPTY!!! (1)")
175 debug("FOLDER STACK is EMPTY!!! (2)")
176 self.current_object = None
180 HTMLParser.close(self)
181 if self.folder_stack:
182 raise ValueError, "wrong folder stack: %s" % self.folder_stack
185 def do_dd(self, attrs):
192 def do_hr(self, attrs):
195 self.current_folder.append(Ruler())
196 self.current_object = None
197 self.objects = self.objects + 1
201 def do_br(self, attrs):
202 self.accumulator = "%s<BR>" % self.accumulator
205 # Allow < in the text
206 def unknown_starttag(self, tag, attrs):
207 self.accumulator = "%s<%s>" % (self.accumulator, tag)
210 # Do not allow unknow end tags
211 def unknown_endtag(self, tag):
212 raise NotImplementedError("Unknow end tag `%s'" % tag)