From 2e82a937f80392639176d9a414b55ffb8164ebca Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 24 Jul 2003 14:08:19 +0000 Subject: [PATCH] Initial revision git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@2 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- bkmk2db.py | 143 +++++++++++++++++++++ bkmk_parser.py | 321 +++++++++++++++++++++++++++++++++++++++++++++++ check_db.py | 186 +++++++++++++++++++++++++++ check_new.py | 27 ++++ check_old.py | 27 ++++ check_url_sub.py | 145 +++++++++++++++++++++ check_urls2.py | 310 +++++++++++++++++++++++++++++++++++++++++++++ chk_urls.py | 321 +++++++++++++++++++++++++++++++++++++++++++++++ copy_err.py | 24 ++++ db2bkmk.py | 220 ++++++++++++++++++++++++++++++++ koi2win.db | 14 +++ readme | 207 ++++++++++++++++++++++++++++++ 12 files changed, 1945 insertions(+) create mode 100755 bkmk2db.py create mode 100755 bkmk_parser.py create mode 100755 check_db.py create mode 100755 check_new.py create mode 100755 check_old.py create mode 100755 check_url_sub.py create mode 100755 check_urls2.py create mode 100755 chk_urls.py create mode 100755 copy_err.py create mode 100755 db2bkmk.py create mode 100644 koi2win.db create mode 100644 readme diff --git a/bkmk2db.py b/bkmk2db.py new file mode 100755 index 0000000..39ddea6 --- /dev/null +++ b/bkmk2db.py @@ -0,0 +1,143 @@ +#! /usr/local/bin/python -O +""" + Convert Netscape Navigator's bookmarks.html to FLAD database + + Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design +""" + +import sys, os, stat, string +from getopt import getopt + +import bkmk_parser +from formatter import AbstractFormatter, NullWriter + + +def run(): + optlist, args = getopt(sys.argv[1:], "gits") + + show_pbar = 1 + to_text = 0 + to_gf = 0 + report_stats = 1 + + for _opt, _arg in optlist: + if _opt == '-g': + to_gf = 1 + if _opt == '-i': + show_pbar = 0 + if _opt == '-t': + to_text = 1 + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass + + if args: + if len(args) > 1: + sys.stderr.write("bkmk2db: too many arguments\n") + sys.exit(1) + + filename = args[0] + + else: + filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + if show_pbar: + try: + size = os.stat(filename)[stat.ST_SIZE] + except: + print filename, ": no such file" + sys.exit(1) + + + fmt = AbstractFormatter(NullWriter()) + if to_text: + parser = bkmk_parser.Bookmarks2Text(fmt) + elif to_gf: + parser = bkmk_parser.Bookmarks2Gadfly(fmt) + else: + parser = bkmk_parser.Bookmarks2Flad(fmt) + + + if report_stats: + str = "Converting " + filename + " to " + if to_text: + str = "text" + elif to_gf: + str = "GadFly database" + else: + str = "FLAD database" + + sys.stdout.write("Converting %s to %s: " % (filename, str)) + sys.stdout.flush() + + if show_pbar: + pbar = ttyProgressBar(0, size) + lng = 0 + + # This is for DOS - it counts CRLF, which len() counts as 1 char! + if os.name == 'dos' or os.name == 'nt' : + dos_add = 1 + else: + dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct + + try: + f = open(filename, 'r') + except IOError, msg: + print filename, ":", msg + sys.exit(1) + + header = open("header", 'w') + line_no = 0 + + while 1: + line = f.readline() + if not line: + break + + if show_pbar: + lng = lng + len(line) + dos_add + pbar.display(lng) + + line = string.strip(line) + line_no = line_no + 1 + + try: + parser.feed(line) + + if parser.outfile: # Write header until HTML parser start writing outfile + if header: + header.close() + header = None + else: + header.write(line + '\n') + + except: + break # I need total number of lines; interpreter will print traceback on exit + + if show_pbar: + del pbar + + if report_stats: + print "Ok" + print line_no, "lines proceed" + print parser.urls_no, "urls found" + print parser.record_no, "records created" + + parser.close() + f.close() + + +if __name__ == '__main__': + run() diff --git a/bkmk_parser.py b/bkmk_parser.py new file mode 100755 index 0000000..c3ca1b5 --- /dev/null +++ b/bkmk_parser.py @@ -0,0 +1,321 @@ +""" + Bookmarks parsers + + Written by BroytMann, Mar 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design +""" + + +import os, string, shutil +from htmllib import HTMLParser + + +class BookmarksParser(HTMLParser): # Parser for Navigator's bookmarks (abstract class) + def __init__(self, formatter, verbose=0): + HTMLParser.__init__(self, formatter, verbose) + self.urls_no = 0 # cross-reference counter + self.record_no = 1 # record counter + self.outfile = None # output file + self.level = 0 # Indentation level + self.flag_out = 0 # Is it time to flush? + self.saved_data = '' + self.saved_anchor = None + self.saved_folder = None + self.saved_ruler = None + + + def flush(self): + if not self.outfile: + return + + record_flushed = 0 + + if self.saved_anchor: + name, href, add_date, last_visit, last_modified, comment = self.saved_anchor + self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + self.saved_data) + self.flush_anchor() + self.saved_data = '' + record_flushed = 1 + self.saved_anchor = None + + if self.saved_folder: + name, add_date, comment = self.saved_folder + self.saved_folder = (name, add_date, comment + self.saved_data) + self.flush_folder() + self.saved_data = '' + record_flushed = 1 + self.saved_folder = None + + if self.saved_ruler: + self.flush_ruler() + record_flushed = 1 + self.saved_ruler = None + + if record_flushed: + self.record_no = self.record_no + 1 + + if self.saved_data <> '': # This may occur after ampersand + self.flag_out = 0 + + + + + def close(self): + HTMLParser.close(self) + + if self.outfile: + self.outfile.close() + + if self.level <> 0: + print "Bad HTML:
and
mismatch; level=%d" % self.level + + + def handle_data(self, data): + if not self.outfile: + return + + if data and (data[0] == '&'): # Ampersand parsed by SGMLlib + self.flag_out = 0 + + if self.flag_out == 2: # Process comment after
or
+ if self.saved_anchor: + name, href, add_date, last_visit, last_modified, comment = self.saved_anchor + self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + data) + data = '' # Used + + if self.saved_folder: + name, add_date, comment = self.saved_folder + self.saved_folder = (name, add_date, comment + data) + data = '' # Used + + self.flag_out = 0 + + if self.flag_out == 1: + self.flush() + + if data and (data[0] <> '&') and (self.flag_out == 0): + self.flag_out = 1 # Set flag (to flush data on next call) + + if data: + self.saved_data = self.saved_data + data + + + def anchor_bgn(self, href, add_date, last_visit, last_modified): + self.flush() + self.anchor = (href, add_date, last_visit, last_modified) + + + def anchor_end(self): + if self.anchor: + href, add_date, last_visit, last_modified = self.anchor + self.anchor = None + self.urls_no = self.urls_no + 1 + + self.saved_anchor = (self.saved_data, href, add_date, last_visit, last_modified, '') + self.saved_data = '' # Used + + + def start_a(self, attrs): + href = '' + add_date = '' + last_visit = '' + last_modified = '' + + for attrname, value in attrs: + value = string.strip(value) + if attrname == 'href': + href = value + if attrname == 'add_date': + add_date = value + if attrname == 'last_visit': + last_visit = value + if attrname == 'last_modified': + last_modified = value + + self.anchor_bgn(href, add_date, last_visit, last_modified) + + + def start_h3(self, attrs): # Navigator marks folders with

tags + self.flush() + add_date = '' + + for attrname, value in attrs: + value = string.strip(value) + if attrname == 'add_date': + add_date = value + + self.saved_folder = ('', add_date, '') + self.flag_out = 0 + + + def end_h3(self): # End of folder + name, add_date, comment = self.saved_folder + self.saved_folder = (name + self.saved_data, add_date, comment) + self.saved_data = '' # Used + + + def start_dl(self, attrs): + self.flush() + + if not self.outfile: # We are starting output after 1st
tag to skip header + self.open_outfile() + + self.level = self.level + 1 + + + def end_dl(self): + self.flush() + self.level = self.level - 1 + + + def do_dd(self, attrs): + if self.outfile: + self.flag_out = 2 # Set flag to signal "comment starting" + + + def do_br(self, attrs): + if self.outfile: + self.saved_data = self.saved_data + "
" # Add
... + self.flag_out = 0 # ...and next line of comment to saved comment + + + def do_hr(self, attrs): + if self.outfile: + self.flush() + self.saved_ruler = 1 + + + def handle_charref(self, name): + if self.outfile: + self.flag_out = 0 + self.saved_data = "%s&%c" % (self.saved_data, chr(name)) + + + def handle_entityref(self, name): + if self.outfile: + self.flag_out = 0 + if self.entitydefs.has_key(name): # If it is one of the standard SGML entities - close it with semicolon + x = ';' + else: + x = '' + self.saved_data = "%s&%s%s" % (self.saved_data, name, x) + + + def open_outfile(self): + self.outfile = open("bookmarks.tmp", 'w') + + +class Bookmarks2Text(BookmarksParser): + def flush_anchor(self): + self.outfile.write(" "*(self.level-1) + str(self.saved_anchor) + '\n') + + + def flush_folder(self): + self.outfile.write(" "*(self.level-1) + str(self.saved_folder) + '\n') + + + def flush_ruler(self): + self.outfile.write(" "*(self.level-1) + "----------\n") + + + def __del__(self): + shutil.copy("bookmarks.tmp", "bookmarks.txt") + os.unlink("bookmarks.tmp") + + +class Bookmarks2Flad(BookmarksParser): + def __init__(self, formatter, verbose=0): + BookmarksParser.__init__(self, formatter, verbose) + self.flush_record = 0 + + + def flush(self): + if not self.outfile: + return + + record_flushed = 0 + + if self.saved_anchor or self.saved_folder or self.saved_ruler or self.saved_data: + if self.flush_record: + self.outfile.write('\n') + else: + self.flush_record = 1 + + BookmarksParser.flush(self) + + + def flush_anchor(self): + name, href, add_date, last_visit, last_modified, comment = self.saved_anchor + self.outfile.write("""Level: %d +Title: %s +URL: %s +AddDate: %s +LastVisit: %s +LastModified: %s +Comment: %s +""" % (self.level, name, href, add_date, last_visit, last_modified, comment)) + + def flush_folder(self): + name, add_date, comment = self.saved_folder + self.outfile.write("""Level: %d +Folder: %s +AddDate: %s +Comment: %s +""" % (self.level, name, add_date, comment)) + + def flush_ruler(self): + self.outfile.write("Level: %s\nRuler: YES\n" % self.level) + + + def __del__(self): + shutil.copy("bookmarks.tmp", "bookmarks.db") + os.unlink("bookmarks.tmp") + + +class Bookmarks2Gadfly(BookmarksParser): + def open_outfile(self): + import gadfly + connection = gadfly.gadfly() + connection.startup("bookmarks", ".") + self.connection = connection + + cursor = connection.cursor() + cursor.execute("""create table bookmarks ( + rec_no integer, + level integer, + title varchar, + DATA varchar, + add_date integer, + last_visit integer, + last_modified integer, + comment varchar + )""") + self.outfile = cursor + + self.template = """insert into bookmarks + (rec_no, level, title, DATA, add_date, last_visit, last_modified, comment) + values (?, ?, ?, ?, ?, ?, ?, ?)""" + + + def __del__(self): + self.connection.commit() + + + def flush_anchor(self): + name, href, add_date, last_visit, last_modified, comment = self.saved_anchor + self.outfile.execute(self.template, + (self.record_no, self.level, name, href, + add_date, last_visit, last_modified, comment) + ) + + def flush_folder(self): + name, add_date, comment = self.saved_folder + self.outfile.execute(self.template, + (self.record_no, self.level, name, "Folder", + add_date, '', '', comment) + ) + + def flush_ruler(self): + self.outfile.execute(self.template, + (self.record_no, self.level, '', "Ruler", + '', '', '', '') + ) diff --git a/check_db.py b/check_db.py new file mode 100755 index 0000000..32472e9 --- /dev/null +++ b/check_db.py @@ -0,0 +1,186 @@ +#! /usr/local/bin/python -O +""" + Test FLAD database for: duplicate URLs, too big indent, incorrect record + format, spare keys. + + Written by BroytMann, Jun 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design +""" + +import sys, string +from getopt import getopt +from copy import _copy_dict + +import fladm + + +def error(err_str): + global errors_found, report_stats + if errors_found == 0: + if report_stats: + print "errors found" + + errors_found = errors_found + 1 + sys.stderr.write("%s\n" % err_str) + + if logfile: + logfile.write("%s\n" % err_str) + + +def check_key(record_no, record, key, allow_empty=1): + if not record.has_key(key): + error("Expected `%s' in record %d -- %s" % (key, record_no, str(record))) + return + + if not allow_empty and not record[key]: + error("Empty key `%s' in record %d -- %s" % (key, record_no, str(record))) + + del record[key] + +def check_date(record_no, record, key): + if not record.has_key(key): + error("Expected `%s' in record %d -- %s" % (key, record_no, str(record))) + else: + try: + _date = string.atoi(record[key]) + except string.atoi_error: + error("Bad `%s' format in record %d -- %s" % (key, record_no, str(record))) + + del record[key] + +def check_empty(record_no, record): + if record <> {}: + error("Spare keys in record %d -- %s" % (record_no, str(record))) + +def check_url(record_no, record): + # I am not testing here check_url("Level") because it is impossible + # to come here without "Level" key - fladm.check_record has to reject + # entire database if there is record without this "must key". + # If someone adds record without "Level" manually - it is serious error + # and the following line raise exception. + del record["Level"] + + check_key(record_no, record, "Title") + check_key(record_no, record, "URL") + check_key(record_no, record, "Comment") + + check_date(record_no, record, "AddDate") + check_date(record_no, record, "LastVisit") + check_date(record_no, record, "LastModified") + + check_empty(record_no, record) + +def check_folder(record_no, record): + # Read comment above - in the beginning of check_url() + del record["Level"] + + check_key(record_no, record, "Folder") + check_key(record_no, record, "Comment") + + check_date(record_no, record, "AddDate") + check_empty(record_no, record) + +def check_ruler(record_no, record): + # Read comment above - in the beginning of check_url() + del record["Level"] + + if not record.has_key("Ruler"): + error("No `Ruler' in record %d -- %s" % (record_no, str(record))) + else: + if record["Ruler"] <> "YES": # Impossible: ruler saying it is not ruler + error("Ruler saying it is not ruler in record %d -- %s" % (record_no, str(record))) + del record["Ruler"] + + check_empty(record_no, record) + + +def run(): + optlist, args = getopt(sys.argv[1:], "l:s") + + global errors_found, report_stats, logfile + report_stats = 1 + + logfile = None + logfname = None + + for _opt, _arg in optlist: + if _opt == '-l': + logfname = _arg + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass + + if len(args) > 1: + sys.stderr.write("check_db: too many arguments\n") + sys.exit(1) + + + if logfname: + logfile = open(logfname, 'w') + + if report_stats: + sys.stdout.write("Loading: ") + sys.stdout.flush() + + bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"]) + + if report_stats: + print "Ok" + sys.stdout.write("Testing: ") + sys.stdout.flush() + + record_no = 0 + save_level = 1 + got_folder = 1 # Start as if we already have one folder + errors_found = 0 + + URL_d = {} # Create hash table full of URLs + + for record in bookmarks_db: + record_no = record_no + 1 + level = string.atoi(record["Level"]) + + if record.has_key("URL"): + if URL_d.has_key(record["URL"]): + error("Duplicate URL (rec. %d, 1st at rec. %d): %s" % (record_no, URL_d[record["URL"]], str(record["URL"]))) + else: + URL_d[record["URL"]] = record_no + + check_url(record_no, _copy_dict(record)) + + elif record.has_key("Folder"): + check_folder(record_no, _copy_dict(record)) + + elif record.has_key("Ruler"): + check_ruler(record_no, _copy_dict(record)) + + else: + raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record) + + if got_folder: + if (level > save_level + 1): + error("Indent %d too big (want %d at rec. %d), record: %s" % (level, save_level, record_no, str(record))) + else: + if (level > save_level): + error("Indent %d without folder (rec. %d), record: %s" % (level, record_no, str(record))) + + save_level = level + got_folder = record.has_key("Folder") # Test here to save got_folder for next loop + + # End of loop + + if logfname: + logfile.close() + + if report_stats: + print record_no, "records tested" + if errors_found == 0: + print "Ok (no errors found)" + else: + print "%d errors found" % errors_found + + +if __name__ == '__main__': + run() diff --git a/check_new.py b/check_new.py new file mode 100755 index 0000000..5cd0a64 --- /dev/null +++ b/check_new.py @@ -0,0 +1,27 @@ +#! /usr/local/bin/python -O +""" + Test FLAD database for old records + + Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design +""" + + +import fladm +from time import time + +now = time() +thrashold = 2*24*3600 # 2 days + + +def run(): + bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"]) + + for record in bookmarks_db: + if record.has_key("URL"): + add_date = int(record["AddDate"]) + if now - add_date < thrashold: + print "New URL:", record["URL"] + + +if __name__ == '__main__': + run() diff --git a/check_old.py b/check_old.py new file mode 100755 index 0000000..15a0990 --- /dev/null +++ b/check_old.py @@ -0,0 +1,27 @@ +#! /usr/local/bin/python -O +""" + Test FLAD database for old records + + Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design +""" + + +import fladm +from time import time + +now = time() +thrashold = 2*30*24*3600 # 2 months + + +def run(): + bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"]) + + for record in bookmarks_db: + if record.has_key("URL"): + last_visit = int(record["LastVisit"]) + if now - last_visit > thrashold: + print "Old URL:", record["URL"] + + +if __name__ == '__main__': + run() diff --git a/check_url_sub.py b/check_url_sub.py new file mode 100755 index 0000000..6c301df --- /dev/null +++ b/check_url_sub.py @@ -0,0 +1,145 @@ +#! /usr/local/bin/python -O +""" + Check URL - subprocess + + Written by BroytMann, Mar 1999 - Feb 2000. Copyright (C) 1999-2000 PhiloSoft Design +""" + + +import sys, os, stat, string, time +import urllib, www_util + +import cPickle +pickle = cPickle +from subproc import RecordFile + +from md5wrapper import md5wrapper + + +ftpcache_key = None +def myftpwrapper(user, passwd, host, port, dirs): + global ftpcache_key + ftpcache_key = (user, host, port, string.joinfields(dirs, '/')) + return _ftpwrapper(user, passwd, host, port, dirs) + +_ftpwrapper = urllib.ftpwrapper +urllib.ftpwrapper = myftpwrapper + +def get_welcome(): + global ftpcache_key + _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome + ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. + # If there are - ftpcache_key in prev line is invalid. + return _welcome + + +class RedirectException(Exception): + reloc_dict = { + 301: "perm", + 302: "temp" + } + def __init__(self, errcode, newurl): + Exception.__init__(self, "(%s.) to %s" % (self.reloc_dict[errcode], newurl)) + + +class MyURLopener(urllib.URLopener): + # Error 302 -- relocated (temporarily) + def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): + if headers.has_key('location'): + newurl = headers['location'] + elif headers.has_key('uri'): + newurl = headers['uri'] + else: + newurl = "Nowhere" + raise RedirectException(errcode, newurl) + + # Error 301 -- also relocated (permanently) + http_error_301 = http_error_302 + + # Error 401 -- authentication required + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): + raise IOError, ('http error', errcode, "Authentication required ", headers) + + +def get_error(msg): + if type(msg) == type(""): + return msg + + else: + s = [] + for i in msg: + s.append("'%s'" % string.join(string.split(str(i), "\n"), "\\n")) + return "(%s)" % string.join(s) + +def check_url(record): + try: + now = str(int(time.time())) + url_type, url_rest = urllib.splittype(record["URL"]) + url_host, url_path = urllib.splithost(url_rest) + url_path, url_tag = urllib.splittag(url_path) + + tempfname = record["TEMPFILE"] + del record["TEMPFILE"] + + fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), tempfname) + + last_modified = None + record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE]) + + if headers: + try: + last_modified = headers["Last-Modified"] + except KeyError: + last_modified = None + + if last_modified: + last_modified = www_util.parse_time(last_modified) + + if last_modified: + last_modified = str(int(last_modified)) + else: + last_modified = record["LastVisit"] + + record["LastModified"] = last_modified + + md5 = md5wrapper() + if url_type == "ftp": # Pass welcome message through MD5 + md5.update(get_welcome()) + + md5.md5file(tempfname) + record["MD5"] = str(md5) + + except IOError, msg: + if (msg[0] == "http error") and (msg[1] == -1): + record["NoError"] = "The server did not return any header - it is not an error, actually" + else: + record["Error"] = get_error(msg) + + except EOFError: + record["Error"] = "Unexpected EOF (FTP server closed connection)" + + except RedirectException, msg: + record["Moved"] = str(msg) + + # Mark this even in case of error + record["LastTested"] = now + + +def run(): + urllib._urlopener = MyURLopener() + + # Some sites allow only Mozilla-compatible browsers; way to stop robots? + server_version = "Mozilla/3.0 (compatible; Python-urllib/%s)" % urllib.__version__ + urllib._urlopener.addheaders[0] = ('User-agent', server_version) + + rec_in = RecordFile(sys.stdin) + rec_out = RecordFile(sys.stdout) + + while 1: + record = pickle.loads(rec_in.read_record()) + check_url(record) + rec_out.write_record(pickle.dumps(record)) + + +if __name__ == '__main__': + run() diff --git a/check_urls2.py b/check_urls2.py new file mode 100755 index 0000000..73a91f9 --- /dev/null +++ b/check_urls2.py @@ -0,0 +1,310 @@ +#! /usr/local/bin/python -O +""" + For every URL in the FLAD database get info from the Net + and store info in check.db + + Version 2.0 + Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design +""" + + +import sys, os, stat, string, time +from getopt import getopt + +import urllib, tempfile +from copy import _copy_dict + +import cPickle +pickle = cPickle + +import fladm, fladc, shutil +from flog import makelog, openlog + + +os.environ["PATH"] = ".:" + os.environ["PATH"] +from subproc import Subprocess, RecordFile + + +def set_checkpoint(rec_no): + cpfile = open("check.dat", 'w') + cpfile.write("# chk_urls checkpoint file\n") + cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE]) + cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME]) + cpfile.write("Record: %d" % rec_no) + cpfile.close() + +def get_checkpoint(): + try: + cpfile = fladc.load_file("check.dat") + if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \ + (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]): + return -3 + + return string.atoi(cpfile["Record"]) + + except IOError: # No such file + return -1 + + except KeyError: # No such key in checkpoint file + return -2 + + except string.atoi_error: # Wrong numeric format + return -2 + + return 0 + +def start(db_name, report_stats): + start_recno = get_checkpoint() + if start_recno < 0: + if start_recno == -1: + log = makelog("check.log") + log("chk_urls started") + if report_stats: + print " chk_urls: normal start" + + elif start_recno == -2: + log = openlog("check.log") + log("chk_urls started") + log(" invalid checkpoint file, checkpoint ignored") + if report_stats: + print " chk_urls: invalid checkpoint file, checkpoint ignored" + + elif start_recno == -3: + log = makelog("check.log") + log("chk_urls started") + log(" bookmarks.db changed, checkpoint ignored") + if report_stats: + print " chk_urls: bookmarks.db changed, checkpoint ignored" + + else: + raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno) + + start_recno = 0 + + elif start_recno == 0: + raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno) + + else: # start_recno > 0 + if os.path.exists("check.db"): + if not os.path.exists("check.old"): + shutil.copy("check.db", "check.old") + db_name = "check.db" + + log = openlog("check.log") + log("chk_urls started") + log(" found valid checkpoint file, continue") + if report_stats: + print " chk_urls: found valid checkpoint file, continue" + + else: + log = makelog("check.log") + log("chk_urls started") + log(" valid checkpoint, but no check.db file, restarting") + if report_stats: + print " chk_urls: valid checkpoint, but no check.db file, restarting" + start_recno = 0 + + return start_recno, db_name, log + + +tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp" + + +check_subp = None +subp_pipe = None + +def restart_subp(log, report_stats): + global check_subp, subp_pipe + if check_subp: + log(" restarting hanging subprocess") + if report_stats: + print " chk_urls: restarting hanging subprocess" + del check_subp + del subp_pipe + + check_subp = Subprocess("check_url_sub.py") + subp_pipe = RecordFile(check_subp) + + +def check_url(record, log, report_stats): + try: + record["TEMPFILE"] = tempfname + subp_pipe.write_record(pickle.dumps(record)) + + if check_subp.waitForPendingChar(900): # wait 15 minutes + rec = pickle.loads(subp_pipe.read_record()) + del record["TEMPFILE"] + for key in rec.keys(): + record[key] = rec[key] + else: + restart_subp(log, report_stats) + del record["TEMPFILE"] + record["Error"] = "Subprocess connection timed out" + + except KeyboardInterrupt: + return 0 + + return 1 + + +def run(): + optlist, args = getopt(sys.argv[1:], "ise") + + show_pbar = 1 + report_stats = 1 + only_errors = 0 + db_name = "bookmarks.db" + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + if _opt == '-e': + only_errors = 1 + try: + del _opt, _arg + except NameError: + pass + + if report_stats: + print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design" + + if args: + if len(args) > 1: + sys.stderr.write("chk_urls: too many arguments\n") + sys.exit(1) + else: + db_name = args[0] + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + global db_stat, log + db_stat = os.stat(db_name) + + if only_errors: + start_recno = 0 + db_name = "check.db" + log = openlog("check.log") + log("chk_urls restarted for errors") + else: + start_recno, db_name, log = start(db_name, report_stats) + + if report_stats: + sys.stdout.write("Loading %s: " % db_name) + sys.stdout.flush() + + bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"]) + bookmarks_dbstore = bookmarks_db + + if only_errors: + bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db) + + if report_stats: + print "Ok" + + db_len = len(bookmarks_db) + if db_len == 0: + print "Database empty" + sys.exit(0) + + if start_recno >= db_len: + _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len) + log(" " + _s) + if report_stats: + print " chk_urls: " + _s + del _s + start_recno = 0 + + if report_stats: + if only_errors: + s = "Rechecking errors: " + else: + s = "Checking: " + sys.stdout.write(s) + sys.stdout.flush() + + if show_pbar: + save_stats = report_stats + report_stats = 0 + pbar = ttyProgressBar(0, db_len) + + urls_no = 0 + record_count = 0 + start_time = time.time() + + rcode = 1 + restart_subp(log, report_stats) # Not restart, just start afresh + checked_dict = {} # Dictionary of checked URLs, mapped to records number + + for record_no in range(start_recno, db_len): + if show_pbar: + pbar.display(record_no+1) + + record = bookmarks_db[record_no] + record_count = record_count + 1 + + if only_errors: + del record["Error"] + + if record.has_key("URL"): + url = record["URL"] + if checked_dict.has_key(url): + log("Already checked %s" % url) + level = record["Level"] + comment = record["Comment"] + bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]]) + bookmarks_db[record_no]["Level"] = level + bookmarks_db[record_no]["Comment"] = comment + else: + log("Checking %s" % url) + rcode = check_url(record, log, report_stats) + if rcode: + current_time = time.time() + if current_time - start_time >= 300: # Save checkpoint and database every 5 min + bookmarks_dbstore.store_to_file("check.db") + set_checkpoint(record_no) + log.flush() + start_time = current_time + urls_no = urls_no + 1 + checked_dict[url] = record_no + else: + log("Interrupted by user (^C)") + break + + if show_pbar: + del pbar + report_stats = save_stats + + if report_stats: + print "Ok" + print record_count, "records checked" + print urls_no, "URLs checked" + + bookmarks_dbstore.store_to_file("check.db") + + if rcode: + log("chk_urls finished ok") + log.close() + + urllib.urlcleanup() + if os.path.exists(tempfname): + os.unlink(tempfname) + + if rcode: + if os.path.exists("check.dat"): + os.unlink("check.dat") + else: + set_checkpoint(record_no) + sys.exit(1) + + +if __name__ == '__main__': + run() diff --git a/chk_urls.py b/chk_urls.py new file mode 100755 index 0000000..8bc3ddd --- /dev/null +++ b/chk_urls.py @@ -0,0 +1,321 @@ +#! /usr/local/bin/python -O +""" + For every URL in the FLAD database get info from the Net + and store info in check.db + + Written by BroytMann, Aug-Oct 1997. Copyright (C) 1997 PhiloSoft Design +""" + +import sys, os, string, stat, shutil, time +from getopt import getopt +import tempfile + +import urllib +from urllib import URLopener, splittype + +from md5wrapper import md5wrapper +from flog import makelog, openlog +import fladm, fladc, www_util + + +# Shortcut for basic usage +_urlopener = None + +def urlopen(url): + global _urlopener + if not _urlopener: + _urlopener = URLopener() + return _urlopener.open(url) + +def urlretrieve(url, filename=None): + global _urlopener + if not _urlopener: + _urlopener = URLopener() + if filename: + return _urlopener.retrieve(url, filename) + else: + return _urlopener.retrieve(url) + +def urlcleanup(): + if _urlopener: + _urlopener.cleanup() + + +_key = None + +def myftpwrapper(user, passwd, host, port, dirs): + global _key + _key = (user, host, port, string.joinfields(dirs, '/')) + return _ftpwrapper(user, passwd, host, port, dirs) + +_ftpwrapper = urllib.ftpwrapper +urllib.ftpwrapper = myftpwrapper + +def get_welcome(): + global _key + _welcome = _urlopener.ftpcache[_key].ftp.welcome + _key = None # I am assuming there are no duplicate ftp URLs in db. If there are - _key in prev line is invalid + return _welcome + + +def set_checkpoint(rec_no): + cpfile = open("check.dat", 'w') + cpfile.write("# chk_urls checkpoint file\n") + cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE]) + cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME]) + cpfile.write("Record: %d" % rec_no) + cpfile.close() + +def get_checkpoint(): + try: + cpfile = fladc.load_file("check.dat") + if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \ + (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]): + return -3 + + return string.atoi(cpfile["Record"]) + + except IOError: # No such file + return -1 + + except KeyError: # No such key in checkpoint file + return -2 + + except string.atoi_error: # Wrong numeric format + return -2 + + return 0 + + +tempfname = tempfile.gettempprefix() + "check.tmp" + + +def get_error(msg): + if type(msg) == type(""): + return msg + + else: + s = "" + for i in msg: + if s <> "": + s = s + ", " + x = string.join(string.split(str(i), "\n"), "\\n") + s = s + "'%s'" % x + return "(" + s + ")" + +def check_url(record, url_type, url_rest): + + now = str(int(time.time())) + + try: + fname, headers = urlretrieve(url_type + ':' + url_rest, tempfname) + + last_modified = None + + record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE]) + + if headers: + try: + last_modified = headers["Last-Modified"] + except KeyError: + last_modified = None + + if last_modified: + last_modified = www_util.parse_time(last_modified) + + if last_modified: + last_modified = str(int(last_modified)) + else: + last_modified = record["LastVisit"] + + record["LastModified"] = last_modified + + md5 = md5wrapper() + if url_type == "ftp": # Pass welcome message through MD5 + md5.update(get_welcome()) + + md5.md5file(tempfname) + record["MD5"] = str(md5) + + except IOError, msg: + record["Error"] = get_error(msg) + + except EOFError: + record["Error"] = "Unexpected EOF (FTP server closed connection)" + + except KeyboardInterrupt: + return 0 + + # Mark this even in case of error + record["LastTested"] = now + + return 1 + + +def run(): + optlist, args = getopt(sys.argv[1:], "is") + + show_pbar = 1 + report_stats = 1 + db_name = "bookmarks.db" + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass + + if report_stats: + print "BroytMann chk_urls, Copyright (C) 1997-1998 PhiloSoft Design" + + if args: + sys.stderr.write("chk_urls: too many arguments\n") + sys.exit(1) + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + global db_stat, log + db_stat = os.stat("bookmarks.db") + + start_recno = get_checkpoint() + if start_recno < 0: + if start_recno == -1: + log = makelog("check.log") + log("chk_urls started") + if report_stats: + print " chk_urls: normal start" + + elif start_recno == -2: + log = openlog("check.log") + log("chk_urls started") + log(" invalid checkpoint file, checkpoint ignored") + if report_stats: + print " chk_urls: invalid checkpoint file, checkpoint ignored" + + elif start_recno == -3: + log = makelog("check.log") + log("chk_urls started") + log(" bookmarks.db changed, checkpoint ignored") + if report_stats: + print " chk_urls: bookmarks.db changed, checkpoint ignored" + + else: + raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno) + + start_recno = 0 + + elif start_recno == 0: + raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno) + + else: # start_recno > 0 + if os.path.exists("check.db"): + if not os.path.exists("check.old"): + shutil.copy("check.db", "check.old") + db_name = "check.db" + + log = openlog("check.log") + log("chk_urls started") + log(" found valid checkpoint file, continue") + if report_stats: + print " chk_urls: found valid checkpoint file, continue" + + else: + log = makelog("check.log") + log("chk_urls started") + log(" valid checkpoint, but no check.db file, restarting") + if report_stats: + print " chk_urls: valid checkpoint, but no check.db file, restarting" + start_recno = 0 + + if report_stats: + sys.stdout.write("Loading %s: " % db_name) + sys.stdout.flush() + + bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"]) + db_len = len(bookmarks_db) + + if report_stats: + print "Ok" + + if start_recno >= db_len: + _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len) + log(" " + _s) + if report_stats: + print " chk_urls: " + _s + del _s + start_recno = 0 + + if report_stats: + sys.stdout.write("Checking: ") + sys.stdout.flush() + + if show_pbar: + pbar = ttyProgressBar(0, db_len) + + urls_no = 0 + record_count = 0 + start_time = time.time() + + rcode = 1 + for record_no in range(start_recno, db_len): + if show_pbar: + pbar.display(record_no+1) + + record = bookmarks_db[record_no] + record_count = record_count + 1 + + if record.has_key("URL"): + url_type, url_rest = splittype(record["URL"]) + log("Checking %s:%s" % (url_type, url_rest)) + rcode = check_url(record, url_type, url_rest) + if rcode: + current_time = time.time() + if current_time - start_time >= 300: # Save checkpoint and database every 5 min + bookmarks_db.store_to_file("check.db") + set_checkpoint(record_no) + log.flush() + start_time = current_time + urls_no = urls_no + 1 + else: + log("Interrupted by user (^C)") + break + + if show_pbar: + del pbar + + if report_stats: + print "Ok" + print record_count, "records checked" + print urls_no, "URLs checked" + + bookmarks_db.store_to_file("check.db") + + if rcode: + log("chk_urls finished ok") + log.close() + + urlcleanup() + if os.path.exists(tempfname): + os.unlink(tempfname) + + if rcode: + if os.path.exists("check.dat"): + os.unlink("check.dat") + else: + set_checkpoint(record_no) + sys.exit(1) + + +if __name__ == '__main__': + run() diff --git a/copy_err.py b/copy_err.py new file mode 100755 index 0000000..4aa6e35 --- /dev/null +++ b/copy_err.py @@ -0,0 +1,24 @@ +#! /usr/local/bin/python -O +""" + Test FLAD database for old records + + Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design +""" + + +import fladm + + +def run(): + bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"]) + errors = fladm.Flad_WithMustKeys(fladm.check_record, ["Level"]) + + for record in bookmarks_db: + if record.has_key("Error"): + errors.append(record) + + errors.store_to_file("errors.db") + + +if __name__ == '__main__': + run() diff --git a/db2bkmk.py b/db2bkmk.py new file mode 100755 index 0000000..cc2bb82 --- /dev/null +++ b/db2bkmk.py @@ -0,0 +1,220 @@ +#! /usr/local/bin/python -O +""" + Convert FLAD database back to bookmarks.html suitable for Netscape Navigator + + Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design +""" + +import sys, os, string, shutil +from getopt import getopt + +import fladm + + +def write(str): + if private_level == 0: # Put in public all except private folder + public_html.write(str) + private_html.write(str) + + +def unindent(old_level, new_level): + while old_level > new_level: + old_level = old_level - 1 + write(" "*old_level + "

\n") + + +def gen_html(bookmarks_db, show_pbar, report_stats): + global pbar, record_no, urls_no, public_html, private_html, private_level + + shutil.copy("header", "public.html") + shutil.copy("header", "private.html") + + public_html = open("public.html", 'a') + private_html = open("private.html", 'a') + + record_no = 0 + urls_no = 0 + + save_level = 0 + got_folder = 1 # Start as if we already have one folder + private_level = 0 + + for record in bookmarks_db: + record_no = record_no + 1 + + if show_pbar: + pbar.display(record_no) + + level = string.atoi(record["Level"]) + + if level == save_level: + pass + elif level == save_level + 1: + if got_folder: + write(" "*(level - 1) + "

\n") + else: + raise ValueError, "indent without folder" + elif level <= save_level - 1: + unindent(save_level, level) + else: + raise ValueError, "new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1) + + save_level = level + got_folder = record.has_key("Folder") # Test here to save got_folder for next loop + + if private_level == save_level: + private_level = 0 # We've returned to saved private level - private folder is over + + if record.has_key("URL"): + write(" "*level + '

%s\n' % (record["URL"], record["AddDate"], record["LastVisit"], record["LastModified"], record["Title"])) + urls_no = urls_no + 1 + + elif record.has_key("Folder"): + # Dirty hacks here + if (record["Folder"] == "Private links") and (private_level == 0): + private_level = save_level # We found private folder - save its level + + if record["Folder"] == "All the rest - Unclassified": + write(" "*level + '

%s

\n' % (record["AddDate"], record["Folder"])) + else: + write(" "*level + '

%s

\n' % (record["AddDate"], record["Folder"])) + + elif record.has_key("Ruler"): + write(" "*level + "
\n") + + else: + raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record) + + if record.has_key("Comment") and (record["Comment"] <> ''): + write("
%s\n" % string.join(string.split(record["Comment"], "
"), "
\n")) + + + if save_level >= 0: + unindent(save_level, 0) + else: + raise ValueError, "new level (%d) too little - must be >= 0" % save_level + + public_html.close() + private_html.close() + + if show_pbar: + del pbar + + if report_stats: + print "Ok" + + +def translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats): + global pbar, record_no, urls_no, public_html, private_html, private_level + + new_ext = str(transl) + os.rename("public.html", "public." + new_ext) + os.rename("private.html", "private." + new_ext) + + transl_d = {} + transl_db = fladm.load_from_file(transldb_name, fladm.check_record, ["URL1", "URL2"], [""]) + # This prevents any other key to appear in transl.db ^ + + # Generate translation dictionary (hash table) + if transl == 1: + for record in transl_db: + transl_d[record["URL1"]] = record["URL2"] + elif transl == 2: + for record in transl_db: + transl_d[record["URL2"]] = record["URL1"] + else: + raise ValueError, "transl (%d) must be 1 or 2" % transl + + del transl_db # Save few bytes of memory + transl_k = transl_d.keys() + + # Translate URLs + for record in bookmarks_db: + if record.has_key("URL") and (record["URL"] in transl_k): + record["URL"] = transl_d[record["URL"]] + + gen_html(bookmarks_db, show_pbar, report_stats) + + new_ext = str(3 - transl) # Translate 1 to 2, or 2 to 1 + os.rename("public.html", "public." + new_ext) + os.rename("private.html", "private." + new_ext) + + +def run(): + global pbar, record_no, urls_no, public_html, private_html, private_level + + optlist, args = getopt(sys.argv[1:], "ist:r") + + show_pbar = 1 + report_stats = 1 + + transldb_name = "" # dictionary translation; default is no translation + transl = 0 + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + if _opt == '-t': + transldb_name = _arg + transl = 1 + if _opt == '-r': + transl = 2 + try: + del _opt, _arg + except NameError: + pass + + if args: + sys.stderr.write("db2bkmk: too many arguments\n") + sys.exit(1) + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + if report_stats: + sys.stdout.write("Loading: ") + sys.stdout.flush() + + bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"]) + + if report_stats: + print "Ok" + sys.stdout.write("Converting FLAD database to bookmarks.html: ") + sys.stdout.flush() + + if show_pbar: + pbar = ttyProgressBar(0, len(bookmarks_db)) + + gen_html(bookmarks_db, show_pbar, report_stats) + + if transl: + if report_stats: + sys.stdout.write("Translating: ") + sys.stdout.flush() + + if report_stats and show_pbar: # Display bar only without "-i"; + # with "-s" skip it (one bar already + # displayed, and it is enough) + pbar = ttyProgressBar(0, len(bookmarks_db)) + + else: + show_pbar = 0 + + translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats) + + + if report_stats: + print record_no, "records proceed" + print urls_no, "urls created" + + +if __name__ == '__main__': + run() diff --git a/koi2win.db b/koi2win.db new file mode 100644 index 0000000..75433b8 --- /dev/null +++ b/koi2win.db @@ -0,0 +1,14 @@ +URL1: http://www.xland.ru:8088/tel_koi/owa/tel.intro +URL2: http://www.xland.ru:8088/tel_win/owa/tel.intro + +URL1: http://meteo.infospace.ru/koi/moscow/html/r_index.htm +URL2: http://meteo.infospace.ru/win/moscow/html/r_index.htm + +URL1: http://meteo.infospace.ru/koi/wcond/html/r_index.ssi +URL2: http://meteo.infospace.ru/win/wcond/html/r_index.ssi + +URL1: http://koi.dzik.aha.ru/ +URL2: http://www.dzik.aha.ru/ + +URL1: http://www-psb.ad-sbras.nsc.ru/kruglk.htm +URL2: http://www-psb.ad-sbras.nsc.ru/kruglw.htm diff --git a/readme b/readme new file mode 100644 index 0000000..13d197f --- /dev/null +++ b/readme @@ -0,0 +1,207 @@ + + BOOKMARKS database and internet robot + + Here is a set of classes, libraries and programs I use to manipulate my +bookmarks.html. I like Netscape Navigator, but I need more features, so I am +writing these programs for my needs. I need to extend Navigator's "What's new" +feature (Navigator 4 named it "Update bookmarks"). + + These programs are intended to run as follows. +1. bkmk2db converts bookmarks.html to bookmarks.db. +2. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and + saves results in check.db. +3. db2bkmk converts bookmarks.db back to bookmarks.html. + Then I use this bookmarks file and... +4. bkmk2db converts bookmarks.html to bookmarks.db. +5. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and + saves results in check.db (old file copied to check.old). +6. (An yet unnamed program) will compare check.old with check.db and generate +detailed report. For example: + this URL is unchanged + this URL is changed + this URL is unavailable due to: host not found... + + Bookmarks database programs are almost debugged. What need to be done is +support for aliases. Second version of the internet robot is finished. + + Although not required, these programs work fine with tty_pbar.py (my little +module for creating text-mode progress bars). + +COPYRIGHT and LEGAL ISSUES + All programs copyrighted by Oleg Broytmann and PhiloSoft Design. All +sources protected by GNU GPL. Programs are provided "as-is", without any kind +of warranty. All usual blah-blah-blah. + + #include + + +------------------------------ bkmk2db ------------------------------ + NAME + bkmk2db.py - script to convert bookmarks.html to FLAD database. + + SYNOPSIS + bkmk2db.py [-its] [/path/to/bookmarks.html] + + DESCRIPTION + bkmk2db.py splits given file (or ./bookmarks.html) into FLAD database + bookmarks.db in current directory. + + Options: + -i + Inhibit progress bar. Default is to display progress bar if + stderr.isatty() + + -t + Convert to text file (for debugging). Default is to convert to + FLAD. + + -s + Suppress output of statistics at the end of the program. Default + is to write how many lines the program read and how many URLs + parsed. Also suppress some messages during run. + + BUGS + The program starts working by writing lines to header file until + BookmarksParser initializes its own output file (this occur when + parser encountered 1st
tag). It is misdesign. + + Empty comments (no text after
) are not marked specially in + database, so db2bkmk.py will not reconstruct it. I don't need empty +
s, so I consider it as feature, not a real bug. + + Aliases are not supported (yet). + + +------------------------------ db2bkmk ------------------------------ + NAME + db2bkmk.py - script to reconstruct bookmarks.html back from FLAD + database. + + SYNOPSIS + db2bkmk.py [-is] [-t dict.db [-r]] + + DESCRIPTION + db2bkmk.py reads bookmarks.db and creates two HTML files - + public.html and private.html. The latter is just full + bookmarks.html, while the former file hides private folder. + + Options: + -i + Inhibit progress bar. Default is to display progress bar if + stderr.isatty() + + -s + Suppress output of statistics at the end of the program. Default is + to write how many records the program proceed and how many URLs + created. Also suppress some messages during run. + + -t dict.db + For most tasks, if someone need to process bookmarks.db in a + regular way (for example, replace all "gopher://gopher." with + "http://www."), it is easy to write special program, processing + every DB record. For some tasks it is even simpler and faster to + write sed/awk scripts. But there are cases when someone need to + process bookmarks.db in a non-regular way: one URL must be changed + in one way, another URL - in second way, etc. The -t option allows + to use external dictionary for such translation. The dictionary + itself is again FLAD database, where every record have two keys - + URL1 and URL2. With -t option in effect, db2bkmk generates + {private,public}.html, renames them to {private,public}.1, and + then translates the entire bookmarks.db again, generating + {private,public}.2 (totally 4 files), where every URL1 replaced + with URL2 from dictionary. (See koi2win.db for example of + translation dictionary) + + -r + Reverse the effect of -t option - translate from URL2 to URL1. + + BUGS + There are three hacks under line marked with "Dirty hacks here": + 1. if record["Folder"] == "Private links": + This is to hide passwords from my bookmarks file. + + 2. if record["Folder"] == "All the rest - Unclassified": + outfile.write(" "*level + "