+"""
+ Bookmarks parsers
+
+ Written by BroytMann, Mar 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+"""
+
+
+import os, string, shutil
+from htmllib import HTMLParser
+
+
+class BookmarksParser(HTMLParser): # Parser for Navigator's bookmarks (abstract class)
+ def __init__(self, formatter, verbose=0):
+ HTMLParser.__init__(self, formatter, verbose)
+ self.urls_no = 0 # cross-reference counter
+ self.record_no = 1 # record counter
+ self.outfile = None # output file
+ self.level = 0 # Indentation level
+ self.flag_out = 0 # Is it time to flush?
+ self.saved_data = ''
+ self.saved_anchor = None
+ self.saved_folder = None
+ self.saved_ruler = None
+
+
+ def flush(self):
+ if not self.outfile:
+ return
+
+ record_flushed = 0
+
+ if self.saved_anchor:
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + self.saved_data)
+ self.flush_anchor()
+ self.saved_data = ''
+ record_flushed = 1
+ self.saved_anchor = None
+
+ if self.saved_folder:
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name, add_date, comment + self.saved_data)
+ self.flush_folder()
+ self.saved_data = ''
+ record_flushed = 1
+ self.saved_folder = None
+
+ if self.saved_ruler:
+ self.flush_ruler()
+ record_flushed = 1
+ self.saved_ruler = None
+
+ if record_flushed:
+ self.record_no = self.record_no + 1
+
+ if self.saved_data <> '': # This may occur after ampersand
+ self.flag_out = 0
+
+
+
+
+ def close(self):
+ HTMLParser.close(self)
+
+ if self.outfile:
+ self.outfile.close()
+
+ if self.level <> 0:
+ print "Bad HTML: <DL> and </DL> mismatch; level=%d" % self.level
+
+
+ def handle_data(self, data):
+ if not self.outfile:
+ return
+
+ if data and (data[0] == '&'): # Ampersand parsed by SGMLlib
+ self.flag_out = 0
+
+ if self.flag_out == 2: # Process comment after <DD> or <HR>
+ if self.saved_anchor:
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + data)
+ data = '' # Used
+
+ if self.saved_folder:
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name, add_date, comment + data)
+ data = '' # Used
+
+ self.flag_out = 0
+
+ if self.flag_out == 1:
+ self.flush()
+
+ if data and (data[0] <> '&') and (self.flag_out == 0):
+ self.flag_out = 1 # Set flag (to flush data on next call)
+
+ if data:
+ self.saved_data = self.saved_data + data
+
+
+ def anchor_bgn(self, href, add_date, last_visit, last_modified):
+ self.flush()
+ self.anchor = (href, add_date, last_visit, last_modified)
+
+
+ def anchor_end(self):
+ if self.anchor:
+ href, add_date, last_visit, last_modified = self.anchor
+ self.anchor = None
+ self.urls_no = self.urls_no + 1
+
+ self.saved_anchor = (self.saved_data, href, add_date, last_visit, last_modified, '')
+ self.saved_data = '' # Used
+
+
+ def start_a(self, attrs):
+ href = ''
+ add_date = ''
+ last_visit = ''
+ last_modified = ''
+
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'href':
+ href = value
+ if attrname == 'add_date':
+ add_date = value
+ if attrname == 'last_visit':
+ last_visit = value
+ if attrname == 'last_modified':
+ last_modified = value
+
+ self.anchor_bgn(href, add_date, last_visit, last_modified)
+
+
+ def start_h3(self, attrs): # Navigator marks folders with <H3> tags
+ self.flush()
+ add_date = ''
+
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'add_date':
+ add_date = value
+
+ self.saved_folder = ('', add_date, '')
+ self.flag_out = 0
+
+
+ def end_h3(self): # End of folder
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name + self.saved_data, add_date, comment)
+ self.saved_data = '' # Used
+
+
+ def start_dl(self, attrs):
+ self.flush()
+
+ if not self.outfile: # We are starting output after 1st <DL> tag to skip header
+ self.open_outfile()
+
+ self.level = self.level + 1
+
+
+ def end_dl(self):
+ self.flush()
+ self.level = self.level - 1
+
+
+ def do_dd(self, attrs):
+ if self.outfile:
+ self.flag_out = 2 # Set flag to signal "comment starting"
+
+
+ def do_br(self, attrs):
+ if self.outfile:
+ self.saved_data = self.saved_data + "<BR>" # Add <BR>...
+ self.flag_out = 0 # ...and next line of comment to saved comment
+
+
+ def do_hr(self, attrs):
+ if self.outfile:
+ self.flush()
+ self.saved_ruler = 1
+
+
+ def handle_charref(self, name):
+ if self.outfile:
+ self.flag_out = 0
+ self.saved_data = "%s&%c" % (self.saved_data, chr(name))
+
+
+ def handle_entityref(self, name):
+ if self.outfile:
+ self.flag_out = 0
+ if self.entitydefs.has_key(name): # If it is one of the standard SGML entities - close it with semicolon
+ x = ';'
+ else:
+ x = ''
+ self.saved_data = "%s&%s%s" % (self.saved_data, name, x)
+
+
+ def open_outfile(self):
+ self.outfile = open("bookmarks.tmp", 'w')
+
+
+class Bookmarks2Text(BookmarksParser):
+ def flush_anchor(self):
+ self.outfile.write(" "*(self.level-1) + str(self.saved_anchor) + '\n')
+
+
+ def flush_folder(self):
+ self.outfile.write(" "*(self.level-1) + str(self.saved_folder) + '\n')
+
+
+ def flush_ruler(self):
+ self.outfile.write(" "*(self.level-1) + "----------\n")
+
+
+ def __del__(self):
+ shutil.copy("bookmarks.tmp", "bookmarks.txt")
+ os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Flad(BookmarksParser):
+ def __init__(self, formatter, verbose=0):
+ BookmarksParser.__init__(self, formatter, verbose)
+ self.flush_record = 0
+
+
+ def flush(self):
+ if not self.outfile:
+ return
+
+ record_flushed = 0
+
+ if self.saved_anchor or self.saved_folder or self.saved_ruler or self.saved_data:
+ if self.flush_record:
+ self.outfile.write('\n')
+ else:
+ self.flush_record = 1
+
+ BookmarksParser.flush(self)
+
+
+ def flush_anchor(self):
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.outfile.write("""Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s
+""" % (self.level, name, href, add_date, last_visit, last_modified, comment))
+
+ def flush_folder(self):
+ name, add_date, comment = self.saved_folder
+ self.outfile.write("""Level: %d
+Folder: %s
+AddDate: %s
+Comment: %s
+""" % (self.level, name, add_date, comment))
+
+ def flush_ruler(self):
+ self.outfile.write("Level: %s\nRuler: YES\n" % self.level)
+
+
+ def __del__(self):
+ shutil.copy("bookmarks.tmp", "bookmarks.db")
+ os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Gadfly(BookmarksParser):
+ def open_outfile(self):
+ import gadfly
+ connection = gadfly.gadfly()
+ connection.startup("bookmarks", ".")
+ self.connection = connection
+
+ cursor = connection.cursor()
+ cursor.execute("""create table bookmarks (
+ rec_no integer,
+ level integer,
+ title varchar,
+ DATA varchar,
+ add_date integer,
+ last_visit integer,
+ last_modified integer,
+ comment varchar
+ )""")
+ self.outfile = cursor
+
+ self.template = """insert into bookmarks
+ (rec_no, level, title, DATA, add_date, last_visit, last_modified, comment)
+ values (?, ?, ?, ?, ?, ?, ?, ?)"""
+
+
+ def __del__(self):
+ self.connection.commit()
+
+
+ def flush_anchor(self):
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, name, href,
+ add_date, last_visit, last_modified, comment)
+ )
+
+ def flush_folder(self):
+ name, add_date, comment = self.saved_folder
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, name, "Folder",
+ add_date, '', '', comment)
+ )
+
+ def flush_ruler(self):
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, '', "Ruler",
+ '', '', '', '')
+ )