X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=inline;f=bkmk_parser.py;h=37f5f7743d33e0fc48527832559677f8775ba66a;hb=e6c70a9132fbf75774a492ddc194da9f74b31e01;hp=f396e2e5aa9be64cd40e3db47ec8d19b8927bc3d;hpb=fb5c3b2b91aeeb615d6d6d890491af3fdff69556;p=bookmarks_db.git
diff --git a/bkmk_parser.py b/bkmk_parser.py
index f396e2e..37f5f77 100755
--- a/bkmk_parser.py
+++ b/bkmk_parser.py
@@ -1,16 +1,18 @@
"""
- Parser for Netscape Navigator's bookmarks.html
+ Parser for Netscape Navigator's and Mozilla's bookmarks.html
- Written by BroytMann, Jun 1997 - Jun 2002. Copyright (C) 1997-2002 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 1997-2003 PhiloSoft Design
"""
-import string
-from m_lib.www.html import HTMLParser
+import os
+from m_lib.net.www.html import HTMLParser
from bkmk_objects import Folder, Bookmark, Ruler
-if __debug__:
+DEBUG = os.environ.has_key("BKMK_DEBUG")
+
+if DEBUG:
def debug(note):
print note
@@ -19,7 +21,7 @@ if __debug__:
for object in folder_stack:
if object.isFolder:
l.append(object.name)
- return "'" + string.join(l, "' '") + "'"
+ return "'%s'" % "' '".join(l)
else:
def debug(note):
@@ -42,7 +44,7 @@ class BkmkParser(HTMLParser):
if data:
if self.charset:
data = unicode(data, self.charset).encode()
- self.accumulator = "%s%s" % (self.accumulator, data)
+ self.accumulator += data
# Mozilla - get charset
@@ -51,7 +53,7 @@ class BkmkParser(HTMLParser):
content = ""
for attrname, value in attrs:
- value = string.strip(value)
+ value = value.strip()
if attrname == 'http-equiv':
http_equiv = value.lower()
elif attrname == 'content':
@@ -66,10 +68,10 @@ class BkmkParser(HTMLParser):
def start_title(self, attrs):
- self.accumulator = "%s
" % self.accumulator
+ self.accumulator += ""
def end_title(self):
- self.accumulator = "%s" % self.accumulator
+ self.accumulator += ""
# Start root folder
@@ -80,7 +82,7 @@ class BkmkParser(HTMLParser):
self.current_folder = root_folder
self.folder_stack = [root_folder]
- self.root_folder.header = self.accumulator
+ self.root_folder.header = self.accumulator.strip()
self.accumulator = ''
def end_h1(self):
@@ -94,7 +96,7 @@ class BkmkParser(HTMLParser):
# Start next folder
def start_h3(self, attrs):
for attrname, value in attrs:
- value = string.strip(value)
+ value = value.strip()
if attrname == 'add_date':
add_date = value
@@ -104,7 +106,7 @@ class BkmkParser(HTMLParser):
self.current_folder.append(folder)
self.folder_stack.append(folder) # push new folder
self.current_folder = folder
- self.objects = self.objects + 1
+ self.objects += 1
def end_h3(self):
accumulator = self.accumulator
@@ -116,8 +118,11 @@ class BkmkParser(HTMLParser):
# Start bookmark
def start_a(self, attrs):
+ last_visit = None
+ last_modified = None
+
for attrname, value in attrs:
- value = string.strip(value)
+ value = value.strip()
if attrname == 'href':
href = value
if attrname == 'add_date':
@@ -131,8 +136,8 @@ class BkmkParser(HTMLParser):
bookmark = Bookmark(href, add_date, last_visit, last_modified)
self.current_object = bookmark
self.current_folder.append(bookmark)
- self.urls = self.urls + 1
- self.objects = self.objects + 1
+ self.urls += 1
+ self.objects += 1
def end_a(self):
accumulator = self.accumulator
@@ -150,8 +155,9 @@ class BkmkParser(HTMLParser):
self.accumulator = ''
current_object = self.current_object
- current_object.comment = current_object.comment + accumulator
- debug("Comment: `%s'" % current_object.comment)
+ if current_object:
+ current_object.comment += accumulator.strip()
+ debug("Comment: `%s'" % current_object.comment)
def start_dl(self, attrs):
@@ -194,17 +200,17 @@ class BkmkParser(HTMLParser):
debug("Ruler")
self.current_folder.append(Ruler())
self.current_object = None
- self.objects = self.objects + 1
+ self.objects += 1
# BR in comment
def do_br(self, attrs):
- self.accumulator = "%s
" % self.accumulator
+ self.accumulator += "
"
# Allow < in the text
def unknown_starttag(self, tag, attrs):
- self.accumulator = "%s<%s>" % (self.accumulator, tag)
+ self.accumulator += "<%s>" % tag
# Do not allow unknow end tags