--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Convert Netscape Navigator's bookmarks.html to FLAD database
+
+ Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+import sys, os, stat, string
+from getopt import getopt
+
+import bkmk_parser
+from formatter import AbstractFormatter, NullWriter
+
+
+def run():
+ optlist, args = getopt(sys.argv[1:], "gits")
+
+ show_pbar = 1
+ to_text = 0
+ to_gf = 0
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-g':
+ to_gf = 1
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-t':
+ to_text = 1
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if args:
+ if len(args) > 1:
+ sys.stderr.write("bkmk2db: too many arguments\n")
+ sys.exit(1)
+
+ filename = args[0]
+
+ else:
+ filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ if show_pbar:
+ try:
+ size = os.stat(filename)[stat.ST_SIZE]
+ except:
+ print filename, ": no such file"
+ sys.exit(1)
+
+
+ fmt = AbstractFormatter(NullWriter())
+ if to_text:
+ parser = bkmk_parser.Bookmarks2Text(fmt)
+ elif to_gf:
+ parser = bkmk_parser.Bookmarks2Gadfly(fmt)
+ else:
+ parser = bkmk_parser.Bookmarks2Flad(fmt)
+
+
+ if report_stats:
+ str = "Converting " + filename + " to "
+ if to_text:
+ str = "text"
+ elif to_gf:
+ str = "GadFly database"
+ else:
+ str = "FLAD database"
+
+ sys.stdout.write("Converting %s to %s: " % (filename, str))
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, size)
+ lng = 0
+
+ # This is for DOS - it counts CRLF, which len() counts as 1 char!
+ if os.name == 'dos' or os.name == 'nt' :
+ dos_add = 1
+ else:
+ dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
+
+ try:
+ f = open(filename, 'r')
+ except IOError, msg:
+ print filename, ":", msg
+ sys.exit(1)
+
+ header = open("header", 'w')
+ line_no = 0
+
+ while 1:
+ line = f.readline()
+ if not line:
+ break
+
+ if show_pbar:
+ lng = lng + len(line) + dos_add
+ pbar.display(lng)
+
+ line = string.strip(line)
+ line_no = line_no + 1
+
+ try:
+ parser.feed(line)
+
+ if parser.outfile: # Write header until HTML parser start writing outfile
+ if header:
+ header.close()
+ header = None
+ else:
+ header.write(line + '\n')
+
+ except:
+ break # I need total number of lines; interpreter will print traceback on exit
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print "Ok"
+ print line_no, "lines proceed"
+ print parser.urls_no, "urls found"
+ print parser.record_no, "records created"
+
+ parser.close()
+ f.close()
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+"""
+ Bookmarks parsers
+
+ Written by BroytMann, Mar 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+"""
+
+
+import os, string, shutil
+from htmllib import HTMLParser
+
+
+class BookmarksParser(HTMLParser): # Parser for Navigator's bookmarks (abstract class)
+ def __init__(self, formatter, verbose=0):
+ HTMLParser.__init__(self, formatter, verbose)
+ self.urls_no = 0 # cross-reference counter
+ self.record_no = 1 # record counter
+ self.outfile = None # output file
+ self.level = 0 # Indentation level
+ self.flag_out = 0 # Is it time to flush?
+ self.saved_data = ''
+ self.saved_anchor = None
+ self.saved_folder = None
+ self.saved_ruler = None
+
+
+ def flush(self):
+ if not self.outfile:
+ return
+
+ record_flushed = 0
+
+ if self.saved_anchor:
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + self.saved_data)
+ self.flush_anchor()
+ self.saved_data = ''
+ record_flushed = 1
+ self.saved_anchor = None
+
+ if self.saved_folder:
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name, add_date, comment + self.saved_data)
+ self.flush_folder()
+ self.saved_data = ''
+ record_flushed = 1
+ self.saved_folder = None
+
+ if self.saved_ruler:
+ self.flush_ruler()
+ record_flushed = 1
+ self.saved_ruler = None
+
+ if record_flushed:
+ self.record_no = self.record_no + 1
+
+ if self.saved_data <> '': # This may occur after ampersand
+ self.flag_out = 0
+
+
+
+
+ def close(self):
+ HTMLParser.close(self)
+
+ if self.outfile:
+ self.outfile.close()
+
+ if self.level <> 0:
+ print "Bad HTML: <DL> and </DL> mismatch; level=%d" % self.level
+
+
+ def handle_data(self, data):
+ if not self.outfile:
+ return
+
+ if data and (data[0] == '&'): # Ampersand parsed by SGMLlib
+ self.flag_out = 0
+
+ if self.flag_out == 2: # Process comment after <DD> or <HR>
+ if self.saved_anchor:
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + data)
+ data = '' # Used
+
+ if self.saved_folder:
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name, add_date, comment + data)
+ data = '' # Used
+
+ self.flag_out = 0
+
+ if self.flag_out == 1:
+ self.flush()
+
+ if data and (data[0] <> '&') and (self.flag_out == 0):
+ self.flag_out = 1 # Set flag (to flush data on next call)
+
+ if data:
+ self.saved_data = self.saved_data + data
+
+
+ def anchor_bgn(self, href, add_date, last_visit, last_modified):
+ self.flush()
+ self.anchor = (href, add_date, last_visit, last_modified)
+
+
+ def anchor_end(self):
+ if self.anchor:
+ href, add_date, last_visit, last_modified = self.anchor
+ self.anchor = None
+ self.urls_no = self.urls_no + 1
+
+ self.saved_anchor = (self.saved_data, href, add_date, last_visit, last_modified, '')
+ self.saved_data = '' # Used
+
+
+ def start_a(self, attrs):
+ href = ''
+ add_date = ''
+ last_visit = ''
+ last_modified = ''
+
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'href':
+ href = value
+ if attrname == 'add_date':
+ add_date = value
+ if attrname == 'last_visit':
+ last_visit = value
+ if attrname == 'last_modified':
+ last_modified = value
+
+ self.anchor_bgn(href, add_date, last_visit, last_modified)
+
+
+ def start_h3(self, attrs): # Navigator marks folders with <H3> tags
+ self.flush()
+ add_date = ''
+
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'add_date':
+ add_date = value
+
+ self.saved_folder = ('', add_date, '')
+ self.flag_out = 0
+
+
+ def end_h3(self): # End of folder
+ name, add_date, comment = self.saved_folder
+ self.saved_folder = (name + self.saved_data, add_date, comment)
+ self.saved_data = '' # Used
+
+
+ def start_dl(self, attrs):
+ self.flush()
+
+ if not self.outfile: # We are starting output after 1st <DL> tag to skip header
+ self.open_outfile()
+
+ self.level = self.level + 1
+
+
+ def end_dl(self):
+ self.flush()
+ self.level = self.level - 1
+
+
+ def do_dd(self, attrs):
+ if self.outfile:
+ self.flag_out = 2 # Set flag to signal "comment starting"
+
+
+ def do_br(self, attrs):
+ if self.outfile:
+ self.saved_data = self.saved_data + "<BR>" # Add <BR>...
+ self.flag_out = 0 # ...and next line of comment to saved comment
+
+
+ def do_hr(self, attrs):
+ if self.outfile:
+ self.flush()
+ self.saved_ruler = 1
+
+
+ def handle_charref(self, name):
+ if self.outfile:
+ self.flag_out = 0
+ self.saved_data = "%s&%c" % (self.saved_data, chr(name))
+
+
+ def handle_entityref(self, name):
+ if self.outfile:
+ self.flag_out = 0
+ if self.entitydefs.has_key(name): # If it is one of the standard SGML entities - close it with semicolon
+ x = ';'
+ else:
+ x = ''
+ self.saved_data = "%s&%s%s" % (self.saved_data, name, x)
+
+
+ def open_outfile(self):
+ self.outfile = open("bookmarks.tmp", 'w')
+
+
+class Bookmarks2Text(BookmarksParser):
+ def flush_anchor(self):
+ self.outfile.write(" "*(self.level-1) + str(self.saved_anchor) + '\n')
+
+
+ def flush_folder(self):
+ self.outfile.write(" "*(self.level-1) + str(self.saved_folder) + '\n')
+
+
+ def flush_ruler(self):
+ self.outfile.write(" "*(self.level-1) + "----------\n")
+
+
+ def __del__(self):
+ shutil.copy("bookmarks.tmp", "bookmarks.txt")
+ os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Flad(BookmarksParser):
+ def __init__(self, formatter, verbose=0):
+ BookmarksParser.__init__(self, formatter, verbose)
+ self.flush_record = 0
+
+
+ def flush(self):
+ if not self.outfile:
+ return
+
+ record_flushed = 0
+
+ if self.saved_anchor or self.saved_folder or self.saved_ruler or self.saved_data:
+ if self.flush_record:
+ self.outfile.write('\n')
+ else:
+ self.flush_record = 1
+
+ BookmarksParser.flush(self)
+
+
+ def flush_anchor(self):
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.outfile.write("""Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s
+""" % (self.level, name, href, add_date, last_visit, last_modified, comment))
+
+ def flush_folder(self):
+ name, add_date, comment = self.saved_folder
+ self.outfile.write("""Level: %d
+Folder: %s
+AddDate: %s
+Comment: %s
+""" % (self.level, name, add_date, comment))
+
+ def flush_ruler(self):
+ self.outfile.write("Level: %s\nRuler: YES\n" % self.level)
+
+
+ def __del__(self):
+ shutil.copy("bookmarks.tmp", "bookmarks.db")
+ os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Gadfly(BookmarksParser):
+ def open_outfile(self):
+ import gadfly
+ connection = gadfly.gadfly()
+ connection.startup("bookmarks", ".")
+ self.connection = connection
+
+ cursor = connection.cursor()
+ cursor.execute("""create table bookmarks (
+ rec_no integer,
+ level integer,
+ title varchar,
+ DATA varchar,
+ add_date integer,
+ last_visit integer,
+ last_modified integer,
+ comment varchar
+ )""")
+ self.outfile = cursor
+
+ self.template = """insert into bookmarks
+ (rec_no, level, title, DATA, add_date, last_visit, last_modified, comment)
+ values (?, ?, ?, ?, ?, ?, ?, ?)"""
+
+
+ def __del__(self):
+ self.connection.commit()
+
+
+ def flush_anchor(self):
+ name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, name, href,
+ add_date, last_visit, last_modified, comment)
+ )
+
+ def flush_folder(self):
+ name, add_date, comment = self.saved_folder
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, name, "Folder",
+ add_date, '', '', comment)
+ )
+
+ def flush_ruler(self):
+ self.outfile.execute(self.template,
+ (self.record_no, self.level, '', "Ruler",
+ '', '', '', '')
+ )
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Test FLAD database for: duplicate URLs, too big indent, incorrect record
+ format, spare keys.
+
+ Written by BroytMann, Jun 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+"""
+
+import sys, string
+from getopt import getopt
+from copy import _copy_dict
+
+import fladm
+
+
+def error(err_str):
+ global errors_found, report_stats
+ if errors_found == 0:
+ if report_stats:
+ print "errors found"
+
+ errors_found = errors_found + 1
+ sys.stderr.write("%s\n" % err_str)
+
+ if logfile:
+ logfile.write("%s\n" % err_str)
+
+
+def check_key(record_no, record, key, allow_empty=1):
+ if not record.has_key(key):
+ error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
+ return
+
+ if not allow_empty and not record[key]:
+ error("Empty key `%s' in record %d -- %s" % (key, record_no, str(record)))
+
+ del record[key]
+
+def check_date(record_no, record, key):
+ if not record.has_key(key):
+ error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
+ else:
+ try:
+ _date = string.atoi(record[key])
+ except string.atoi_error:
+ error("Bad `%s' format in record %d -- %s" % (key, record_no, str(record)))
+
+ del record[key]
+
+def check_empty(record_no, record):
+ if record <> {}:
+ error("Spare keys in record %d -- %s" % (record_no, str(record)))
+
+def check_url(record_no, record):
+ # I am not testing here check_url("Level") because it is impossible
+ # to come here without "Level" key - fladm.check_record has to reject
+ # entire database if there is record without this "must key".
+ # If someone adds record without "Level" manually - it is serious error
+ # and the following line raise exception.
+ del record["Level"]
+
+ check_key(record_no, record, "Title")
+ check_key(record_no, record, "URL")
+ check_key(record_no, record, "Comment")
+
+ check_date(record_no, record, "AddDate")
+ check_date(record_no, record, "LastVisit")
+ check_date(record_no, record, "LastModified")
+
+ check_empty(record_no, record)
+
+def check_folder(record_no, record):
+ # Read comment above - in the beginning of check_url()
+ del record["Level"]
+
+ check_key(record_no, record, "Folder")
+ check_key(record_no, record, "Comment")
+
+ check_date(record_no, record, "AddDate")
+ check_empty(record_no, record)
+
+def check_ruler(record_no, record):
+ # Read comment above - in the beginning of check_url()
+ del record["Level"]
+
+ if not record.has_key("Ruler"):
+ error("No `Ruler' in record %d -- %s" % (record_no, str(record)))
+ else:
+ if record["Ruler"] <> "YES": # Impossible: ruler saying it is not ruler
+ error("Ruler saying it is not ruler in record %d -- %s" % (record_no, str(record)))
+ del record["Ruler"]
+
+ check_empty(record_no, record)
+
+
+def run():
+ optlist, args = getopt(sys.argv[1:], "l:s")
+
+ global errors_found, report_stats, logfile
+ report_stats = 1
+
+ logfile = None
+ logfname = None
+
+ for _opt, _arg in optlist:
+ if _opt == '-l':
+ logfname = _arg
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if len(args) > 1:
+ sys.stderr.write("check_db: too many arguments\n")
+ sys.exit(1)
+
+
+ if logfname:
+ logfile = open(logfname, 'w')
+
+ if report_stats:
+ sys.stdout.write("Loading: ")
+ sys.stdout.flush()
+
+ bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+ if report_stats:
+ print "Ok"
+ sys.stdout.write("Testing: ")
+ sys.stdout.flush()
+
+ record_no = 0
+ save_level = 1
+ got_folder = 1 # Start as if we already have one folder
+ errors_found = 0
+
+ URL_d = {} # Create hash table full of URLs
+
+ for record in bookmarks_db:
+ record_no = record_no + 1
+ level = string.atoi(record["Level"])
+
+ if record.has_key("URL"):
+ if URL_d.has_key(record["URL"]):
+ error("Duplicate URL (rec. %d, 1st at rec. %d): %s" % (record_no, URL_d[record["URL"]], str(record["URL"])))
+ else:
+ URL_d[record["URL"]] = record_no
+
+ check_url(record_no, _copy_dict(record))
+
+ elif record.has_key("Folder"):
+ check_folder(record_no, _copy_dict(record))
+
+ elif record.has_key("Ruler"):
+ check_ruler(record_no, _copy_dict(record))
+
+ else:
+ raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
+
+ if got_folder:
+ if (level > save_level + 1):
+ error("Indent %d too big (want %d at rec. %d), record: %s" % (level, save_level, record_no, str(record)))
+ else:
+ if (level > save_level):
+ error("Indent %d without folder (rec. %d), record: %s" % (level, record_no, str(record)))
+
+ save_level = level
+ got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+ # End of loop
+
+ if logfname:
+ logfile.close()
+
+ if report_stats:
+ print record_no, "records tested"
+ if errors_found == 0:
+ print "Ok (no errors found)"
+ else:
+ print "%d errors found" % errors_found
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Test FLAD database for old records
+
+ Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+from time import time
+
+now = time()
+thrashold = 2*24*3600 # 2 days
+
+
+def run():
+ bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+ for record in bookmarks_db:
+ if record.has_key("URL"):
+ add_date = int(record["AddDate"])
+ if now - add_date < thrashold:
+ print "New URL:", record["URL"]
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Test FLAD database for old records
+
+ Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+from time import time
+
+now = time()
+thrashold = 2*30*24*3600 # 2 months
+
+
+def run():
+ bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+ for record in bookmarks_db:
+ if record.has_key("URL"):
+ last_visit = int(record["LastVisit"])
+ if now - last_visit > thrashold:
+ print "Old URL:", record["URL"]
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Check URL - subprocess
+
+ Written by BroytMann, Mar 1999 - Feb 2000. Copyright (C) 1999-2000 PhiloSoft Design
+"""
+
+
+import sys, os, stat, string, time
+import urllib, www_util
+
+import cPickle
+pickle = cPickle
+from subproc import RecordFile
+
+from md5wrapper import md5wrapper
+
+
+ftpcache_key = None
+def myftpwrapper(user, passwd, host, port, dirs):
+ global ftpcache_key
+ ftpcache_key = (user, host, port, string.joinfields(dirs, '/'))
+ return _ftpwrapper(user, passwd, host, port, dirs)
+
+_ftpwrapper = urllib.ftpwrapper
+urllib.ftpwrapper = myftpwrapper
+
+def get_welcome():
+ global ftpcache_key
+ _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+ ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+ # If there are - ftpcache_key in prev line is invalid.
+ return _welcome
+
+
+class RedirectException(Exception):
+ reloc_dict = {
+ 301: "perm",
+ 302: "temp"
+ }
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self, "(%s.) to %s" % (self.reloc_dict[errcode], newurl))
+
+
+class MyURLopener(urllib.URLopener):
+ # Error 302 -- relocated (temporarily)
+ def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
+ if headers.has_key('location'):
+ newurl = headers['location']
+ elif headers.has_key('uri'):
+ newurl = headers['uri']
+ else:
+ newurl = "Nowhere"
+ raise RedirectException(errcode, newurl)
+
+ # Error 301 -- also relocated (permanently)
+ http_error_301 = http_error_302
+
+ # Error 401 -- authentication required
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ raise IOError, ('http error', errcode, "Authentication required ", headers)
+
+
+def get_error(msg):
+ if type(msg) == type(""):
+ return msg
+
+ else:
+ s = []
+ for i in msg:
+ s.append("'%s'" % string.join(string.split(str(i), "\n"), "\\n"))
+ return "(%s)" % string.join(s)
+
+def check_url(record):
+ try:
+ now = str(int(time.time()))
+ url_type, url_rest = urllib.splittype(record["URL"])
+ url_host, url_path = urllib.splithost(url_rest)
+ url_path, url_tag = urllib.splittag(url_path)
+
+ tempfname = record["TEMPFILE"]
+ del record["TEMPFILE"]
+
+ fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), tempfname)
+
+ last_modified = None
+ record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
+
+ if headers:
+ try:
+ last_modified = headers["Last-Modified"]
+ except KeyError:
+ last_modified = None
+
+ if last_modified:
+ last_modified = www_util.parse_time(last_modified)
+
+ if last_modified:
+ last_modified = str(int(last_modified))
+ else:
+ last_modified = record["LastVisit"]
+
+ record["LastModified"] = last_modified
+
+ md5 = md5wrapper()
+ if url_type == "ftp": # Pass welcome message through MD5
+ md5.update(get_welcome())
+
+ md5.md5file(tempfname)
+ record["MD5"] = str(md5)
+
+ except IOError, msg:
+ if (msg[0] == "http error") and (msg[1] == -1):
+ record["NoError"] = "The server did not return any header - it is not an error, actually"
+ else:
+ record["Error"] = get_error(msg)
+
+ except EOFError:
+ record["Error"] = "Unexpected EOF (FTP server closed connection)"
+
+ except RedirectException, msg:
+ record["Moved"] = str(msg)
+
+ # Mark this even in case of error
+ record["LastTested"] = now
+
+
+def run():
+ urllib._urlopener = MyURLopener()
+
+ # Some sites allow only Mozilla-compatible browsers; way to stop robots?
+ server_version = "Mozilla/3.0 (compatible; Python-urllib/%s)" % urllib.__version__
+ urllib._urlopener.addheaders[0] = ('User-agent', server_version)
+
+ rec_in = RecordFile(sys.stdin)
+ rec_out = RecordFile(sys.stdout)
+
+ while 1:
+ record = pickle.loads(rec_in.read_record())
+ check_url(record)
+ rec_out.write_record(pickle.dumps(record))
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ For every URL in the FLAD database get info from the Net
+ and store info in check.db
+
+ Version 2.0
+ Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+
+import sys, os, stat, string, time
+from getopt import getopt
+
+import urllib, tempfile
+from copy import _copy_dict
+
+import cPickle
+pickle = cPickle
+
+import fladm, fladc, shutil
+from flog import makelog, openlog
+
+
+os.environ["PATH"] = ".:" + os.environ["PATH"]
+from subproc import Subprocess, RecordFile
+
+
+def set_checkpoint(rec_no):
+ cpfile = open("check.dat", 'w')
+ cpfile.write("# chk_urls checkpoint file\n")
+ cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
+ cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
+ cpfile.write("Record: %d" % rec_no)
+ cpfile.close()
+
+def get_checkpoint():
+ try:
+ cpfile = fladc.load_file("check.dat")
+ if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
+ (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
+ return -3
+
+ return string.atoi(cpfile["Record"])
+
+ except IOError: # No such file
+ return -1
+
+ except KeyError: # No such key in checkpoint file
+ return -2
+
+ except string.atoi_error: # Wrong numeric format
+ return -2
+
+ return 0
+
+def start(db_name, report_stats):
+ start_recno = get_checkpoint()
+ if start_recno < 0:
+ if start_recno == -1:
+ log = makelog("check.log")
+ log("chk_urls started")
+ if report_stats:
+ print " chk_urls: normal start"
+
+ elif start_recno == -2:
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" invalid checkpoint file, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: invalid checkpoint file, checkpoint ignored"
+
+ elif start_recno == -3:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" bookmarks.db changed, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: bookmarks.db changed, checkpoint ignored"
+
+ else:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ start_recno = 0
+
+ elif start_recno == 0:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ else: # start_recno > 0
+ if os.path.exists("check.db"):
+ if not os.path.exists("check.old"):
+ shutil.copy("check.db", "check.old")
+ db_name = "check.db"
+
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" found valid checkpoint file, continue")
+ if report_stats:
+ print " chk_urls: found valid checkpoint file, continue"
+
+ else:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" valid checkpoint, but no check.db file, restarting")
+ if report_stats:
+ print " chk_urls: valid checkpoint, but no check.db file, restarting"
+ start_recno = 0
+
+ return start_recno, db_name, log
+
+
+tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp"
+
+
+check_subp = None
+subp_pipe = None
+
+def restart_subp(log, report_stats):
+ global check_subp, subp_pipe
+ if check_subp:
+ log(" restarting hanging subprocess")
+ if report_stats:
+ print " chk_urls: restarting hanging subprocess"
+ del check_subp
+ del subp_pipe
+
+ check_subp = Subprocess("check_url_sub.py")
+ subp_pipe = RecordFile(check_subp)
+
+
+def check_url(record, log, report_stats):
+ try:
+ record["TEMPFILE"] = tempfname
+ subp_pipe.write_record(pickle.dumps(record))
+
+ if check_subp.waitForPendingChar(900): # wait 15 minutes
+ rec = pickle.loads(subp_pipe.read_record())
+ del record["TEMPFILE"]
+ for key in rec.keys():
+ record[key] = rec[key]
+ else:
+ restart_subp(log, report_stats)
+ del record["TEMPFILE"]
+ record["Error"] = "Subprocess connection timed out"
+
+ except KeyboardInterrupt:
+ return 0
+
+ return 1
+
+
+def run():
+ optlist, args = getopt(sys.argv[1:], "ise")
+
+ show_pbar = 1
+ report_stats = 1
+ only_errors = 0
+ db_name = "bookmarks.db"
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-e':
+ only_errors = 1
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design"
+
+ if args:
+ if len(args) > 1:
+ sys.stderr.write("chk_urls: too many arguments\n")
+ sys.exit(1)
+ else:
+ db_name = args[0]
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ global db_stat, log
+ db_stat = os.stat(db_name)
+
+ if only_errors:
+ start_recno = 0
+ db_name = "check.db"
+ log = openlog("check.log")
+ log("chk_urls restarted for errors")
+ else:
+ start_recno, db_name, log = start(db_name, report_stats)
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % db_name)
+ sys.stdout.flush()
+
+ bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
+ bookmarks_dbstore = bookmarks_db
+
+ if only_errors:
+ bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db)
+
+ if report_stats:
+ print "Ok"
+
+ db_len = len(bookmarks_db)
+ if db_len == 0:
+ print "Database empty"
+ sys.exit(0)
+
+ if start_recno >= db_len:
+ _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
+ log(" " + _s)
+ if report_stats:
+ print " chk_urls: " + _s
+ del _s
+ start_recno = 0
+
+ if report_stats:
+ if only_errors:
+ s = "Rechecking errors: "
+ else:
+ s = "Checking: "
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+ if show_pbar:
+ save_stats = report_stats
+ report_stats = 0
+ pbar = ttyProgressBar(0, db_len)
+
+ urls_no = 0
+ record_count = 0
+ start_time = time.time()
+
+ rcode = 1
+ restart_subp(log, report_stats) # Not restart, just start afresh
+ checked_dict = {} # Dictionary of checked URLs, mapped to records number
+
+ for record_no in range(start_recno, db_len):
+ if show_pbar:
+ pbar.display(record_no+1)
+
+ record = bookmarks_db[record_no]
+ record_count = record_count + 1
+
+ if only_errors:
+ del record["Error"]
+
+ if record.has_key("URL"):
+ url = record["URL"]
+ if checked_dict.has_key(url):
+ log("Already checked %s" % url)
+ level = record["Level"]
+ comment = record["Comment"]
+ bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]])
+ bookmarks_db[record_no]["Level"] = level
+ bookmarks_db[record_no]["Comment"] = comment
+ else:
+ log("Checking %s" % url)
+ rcode = check_url(record, log, report_stats)
+ if rcode:
+ current_time = time.time()
+ if current_time - start_time >= 300: # Save checkpoint and database every 5 min
+ bookmarks_dbstore.store_to_file("check.db")
+ set_checkpoint(record_no)
+ log.flush()
+ start_time = current_time
+ urls_no = urls_no + 1
+ checked_dict[url] = record_no
+ else:
+ log("Interrupted by user (^C)")
+ break
+
+ if show_pbar:
+ del pbar
+ report_stats = save_stats
+
+ if report_stats:
+ print "Ok"
+ print record_count, "records checked"
+ print urls_no, "URLs checked"
+
+ bookmarks_dbstore.store_to_file("check.db")
+
+ if rcode:
+ log("chk_urls finished ok")
+ log.close()
+
+ urllib.urlcleanup()
+ if os.path.exists(tempfname):
+ os.unlink(tempfname)
+
+ if rcode:
+ if os.path.exists("check.dat"):
+ os.unlink("check.dat")
+ else:
+ set_checkpoint(record_no)
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ For every URL in the FLAD database get info from the Net
+ and store info in check.db
+
+ Written by BroytMann, Aug-Oct 1997. Copyright (C) 1997 PhiloSoft Design
+"""
+
+import sys, os, string, stat, shutil, time
+from getopt import getopt
+import tempfile
+
+import urllib
+from urllib import URLopener, splittype
+
+from md5wrapper import md5wrapper
+from flog import makelog, openlog
+import fladm, fladc, www_util
+
+
+# Shortcut for basic usage
+_urlopener = None
+
+def urlopen(url):
+ global _urlopener
+ if not _urlopener:
+ _urlopener = URLopener()
+ return _urlopener.open(url)
+
+def urlretrieve(url, filename=None):
+ global _urlopener
+ if not _urlopener:
+ _urlopener = URLopener()
+ if filename:
+ return _urlopener.retrieve(url, filename)
+ else:
+ return _urlopener.retrieve(url)
+
+def urlcleanup():
+ if _urlopener:
+ _urlopener.cleanup()
+
+
+_key = None
+
+def myftpwrapper(user, passwd, host, port, dirs):
+ global _key
+ _key = (user, host, port, string.joinfields(dirs, '/'))
+ return _ftpwrapper(user, passwd, host, port, dirs)
+
+_ftpwrapper = urllib.ftpwrapper
+urllib.ftpwrapper = myftpwrapper
+
+def get_welcome():
+ global _key
+ _welcome = _urlopener.ftpcache[_key].ftp.welcome
+ _key = None # I am assuming there are no duplicate ftp URLs in db. If there are - _key in prev line is invalid
+ return _welcome
+
+
+def set_checkpoint(rec_no):
+ cpfile = open("check.dat", 'w')
+ cpfile.write("# chk_urls checkpoint file\n")
+ cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
+ cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
+ cpfile.write("Record: %d" % rec_no)
+ cpfile.close()
+
+def get_checkpoint():
+ try:
+ cpfile = fladc.load_file("check.dat")
+ if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
+ (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
+ return -3
+
+ return string.atoi(cpfile["Record"])
+
+ except IOError: # No such file
+ return -1
+
+ except KeyError: # No such key in checkpoint file
+ return -2
+
+ except string.atoi_error: # Wrong numeric format
+ return -2
+
+ return 0
+
+
+tempfname = tempfile.gettempprefix() + "check.tmp"
+
+
+def get_error(msg):
+ if type(msg) == type(""):
+ return msg
+
+ else:
+ s = ""
+ for i in msg:
+ if s <> "":
+ s = s + ", "
+ x = string.join(string.split(str(i), "\n"), "\\n")
+ s = s + "'%s'" % x
+ return "(" + s + ")"
+
+def check_url(record, url_type, url_rest):
+
+ now = str(int(time.time()))
+
+ try:
+ fname, headers = urlretrieve(url_type + ':' + url_rest, tempfname)
+
+ last_modified = None
+
+ record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
+
+ if headers:
+ try:
+ last_modified = headers["Last-Modified"]
+ except KeyError:
+ last_modified = None
+
+ if last_modified:
+ last_modified = www_util.parse_time(last_modified)
+
+ if last_modified:
+ last_modified = str(int(last_modified))
+ else:
+ last_modified = record["LastVisit"]
+
+ record["LastModified"] = last_modified
+
+ md5 = md5wrapper()
+ if url_type == "ftp": # Pass welcome message through MD5
+ md5.update(get_welcome())
+
+ md5.md5file(tempfname)
+ record["MD5"] = str(md5)
+
+ except IOError, msg:
+ record["Error"] = get_error(msg)
+
+ except EOFError:
+ record["Error"] = "Unexpected EOF (FTP server closed connection)"
+
+ except KeyboardInterrupt:
+ return 0
+
+ # Mark this even in case of error
+ record["LastTested"] = now
+
+ return 1
+
+
+def run():
+ optlist, args = getopt(sys.argv[1:], "is")
+
+ show_pbar = 1
+ report_stats = 1
+ db_name = "bookmarks.db"
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann chk_urls, Copyright (C) 1997-1998 PhiloSoft Design"
+
+ if args:
+ sys.stderr.write("chk_urls: too many arguments\n")
+ sys.exit(1)
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ global db_stat, log
+ db_stat = os.stat("bookmarks.db")
+
+ start_recno = get_checkpoint()
+ if start_recno < 0:
+ if start_recno == -1:
+ log = makelog("check.log")
+ log("chk_urls started")
+ if report_stats:
+ print " chk_urls: normal start"
+
+ elif start_recno == -2:
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" invalid checkpoint file, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: invalid checkpoint file, checkpoint ignored"
+
+ elif start_recno == -3:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" bookmarks.db changed, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: bookmarks.db changed, checkpoint ignored"
+
+ else:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ start_recno = 0
+
+ elif start_recno == 0:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ else: # start_recno > 0
+ if os.path.exists("check.db"):
+ if not os.path.exists("check.old"):
+ shutil.copy("check.db", "check.old")
+ db_name = "check.db"
+
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" found valid checkpoint file, continue")
+ if report_stats:
+ print " chk_urls: found valid checkpoint file, continue"
+
+ else:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" valid checkpoint, but no check.db file, restarting")
+ if report_stats:
+ print " chk_urls: valid checkpoint, but no check.db file, restarting"
+ start_recno = 0
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % db_name)
+ sys.stdout.flush()
+
+ bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
+ db_len = len(bookmarks_db)
+
+ if report_stats:
+ print "Ok"
+
+ if start_recno >= db_len:
+ _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
+ log(" " + _s)
+ if report_stats:
+ print " chk_urls: " + _s
+ del _s
+ start_recno = 0
+
+ if report_stats:
+ sys.stdout.write("Checking: ")
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, db_len)
+
+ urls_no = 0
+ record_count = 0
+ start_time = time.time()
+
+ rcode = 1
+ for record_no in range(start_recno, db_len):
+ if show_pbar:
+ pbar.display(record_no+1)
+
+ record = bookmarks_db[record_no]
+ record_count = record_count + 1
+
+ if record.has_key("URL"):
+ url_type, url_rest = splittype(record["URL"])
+ log("Checking %s:%s" % (url_type, url_rest))
+ rcode = check_url(record, url_type, url_rest)
+ if rcode:
+ current_time = time.time()
+ if current_time - start_time >= 300: # Save checkpoint and database every 5 min
+ bookmarks_db.store_to_file("check.db")
+ set_checkpoint(record_no)
+ log.flush()
+ start_time = current_time
+ urls_no = urls_no + 1
+ else:
+ log("Interrupted by user (^C)")
+ break
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print "Ok"
+ print record_count, "records checked"
+ print urls_no, "URLs checked"
+
+ bookmarks_db.store_to_file("check.db")
+
+ if rcode:
+ log("chk_urls finished ok")
+ log.close()
+
+ urlcleanup()
+ if os.path.exists(tempfname):
+ os.unlink(tempfname)
+
+ if rcode:
+ if os.path.exists("check.dat"):
+ os.unlink("check.dat")
+ else:
+ set_checkpoint(record_no)
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Test FLAD database for old records
+
+ Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+
+
+def run():
+ bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+ errors = fladm.Flad_WithMustKeys(fladm.check_record, ["Level"])
+
+ for record in bookmarks_db:
+ if record.has_key("Error"):
+ errors.append(record)
+
+ errors.store_to_file("errors.db")
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Convert FLAD database back to bookmarks.html suitable for Netscape Navigator
+
+ Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+import sys, os, string, shutil
+from getopt import getopt
+
+import fladm
+
+
+def write(str):
+ if private_level == 0: # Put in public all except private folder
+ public_html.write(str)
+ private_html.write(str)
+
+
+def unindent(old_level, new_level):
+ while old_level > new_level:
+ old_level = old_level - 1
+ write(" "*old_level + "</DL><p>\n")
+
+
+def gen_html(bookmarks_db, show_pbar, report_stats):
+ global pbar, record_no, urls_no, public_html, private_html, private_level
+
+ shutil.copy("header", "public.html")
+ shutil.copy("header", "private.html")
+
+ public_html = open("public.html", 'a')
+ private_html = open("private.html", 'a')
+
+ record_no = 0
+ urls_no = 0
+
+ save_level = 0
+ got_folder = 1 # Start as if we already have one folder
+ private_level = 0
+
+ for record in bookmarks_db:
+ record_no = record_no + 1
+
+ if show_pbar:
+ pbar.display(record_no)
+
+ level = string.atoi(record["Level"])
+
+ if level == save_level:
+ pass
+ elif level == save_level + 1:
+ if got_folder:
+ write(" "*(level - 1) + "<DL><p>\n")
+ else:
+ raise ValueError, "indent without folder"
+ elif level <= save_level - 1:
+ unindent(save_level, level)
+ else:
+ raise ValueError, "new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)
+
+ save_level = level
+ got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+ if private_level == save_level:
+ private_level = 0 # We've returned to saved private level - private folder is over
+
+ if record.has_key("URL"):
+ write(" "*level + '<DT><A HREF="%s" ADD_DATE="%s" LAST_VISIT="%s" LAST_MODIFIED="%s">%s</A>\n' % (record["URL"], record["AddDate"], record["LastVisit"], record["LastModified"], record["Title"]))
+ urls_no = urls_no + 1
+
+ elif record.has_key("Folder"):
+ # Dirty hacks here
+ if (record["Folder"] == "Private links") and (private_level == 0):
+ private_level = save_level # We found private folder - save its level
+
+ if record["Folder"] == "All the rest - Unclassified":
+ write(" "*level + '<DT><H3 NEWITEMHEADER ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
+ else:
+ write(" "*level + '<DT><H3 ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
+
+ elif record.has_key("Ruler"):
+ write(" "*level + "<HR>\n")
+
+ else:
+ raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
+
+ if record.has_key("Comment") and (record["Comment"] <> ''):
+ write("<DD>%s\n" % string.join(string.split(record["Comment"], "<BR>"), "<BR>\n"))
+
+
+ if save_level >= 0:
+ unindent(save_level, 0)
+ else:
+ raise ValueError, "new level (%d) too little - must be >= 0" % save_level
+
+ public_html.close()
+ private_html.close()
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print "Ok"
+
+
+def translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats):
+ global pbar, record_no, urls_no, public_html, private_html, private_level
+
+ new_ext = str(transl)
+ os.rename("public.html", "public." + new_ext)
+ os.rename("private.html", "private." + new_ext)
+
+ transl_d = {}
+ transl_db = fladm.load_from_file(transldb_name, fladm.check_record, ["URL1", "URL2"], [""])
+ # This prevents any other key to appear in transl.db ^
+
+ # Generate translation dictionary (hash table)
+ if transl == 1:
+ for record in transl_db:
+ transl_d[record["URL1"]] = record["URL2"]
+ elif transl == 2:
+ for record in transl_db:
+ transl_d[record["URL2"]] = record["URL1"]
+ else:
+ raise ValueError, "transl (%d) must be 1 or 2" % transl
+
+ del transl_db # Save few bytes of memory
+ transl_k = transl_d.keys()
+
+ # Translate URLs
+ for record in bookmarks_db:
+ if record.has_key("URL") and (record["URL"] in transl_k):
+ record["URL"] = transl_d[record["URL"]]
+
+ gen_html(bookmarks_db, show_pbar, report_stats)
+
+ new_ext = str(3 - transl) # Translate 1 to 2, or 2 to 1
+ os.rename("public.html", "public." + new_ext)
+ os.rename("private.html", "private." + new_ext)
+
+
+def run():
+ global pbar, record_no, urls_no, public_html, private_html, private_level
+
+ optlist, args = getopt(sys.argv[1:], "ist:r")
+
+ show_pbar = 1
+ report_stats = 1
+
+ transldb_name = "" # dictionary translation; default is no translation
+ transl = 0
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-t':
+ transldb_name = _arg
+ transl = 1
+ if _opt == '-r':
+ transl = 2
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if args:
+ sys.stderr.write("db2bkmk: too many arguments\n")
+ sys.exit(1)
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ if report_stats:
+ sys.stdout.write("Loading: ")
+ sys.stdout.flush()
+
+ bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+ if report_stats:
+ print "Ok"
+ sys.stdout.write("Converting FLAD database to bookmarks.html: ")
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, len(bookmarks_db))
+
+ gen_html(bookmarks_db, show_pbar, report_stats)
+
+ if transl:
+ if report_stats:
+ sys.stdout.write("Translating: ")
+ sys.stdout.flush()
+
+ if report_stats and show_pbar: # Display bar only without "-i";
+ # with "-s" skip it (one bar already
+ # displayed, and it is enough)
+ pbar = ttyProgressBar(0, len(bookmarks_db))
+
+ else:
+ show_pbar = 0
+
+ translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats)
+
+
+ if report_stats:
+ print record_no, "records proceed"
+ print urls_no, "urls created"
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+URL1: http://www.xland.ru:8088/tel_koi/owa/tel.intro
+URL2: http://www.xland.ru:8088/tel_win/owa/tel.intro
+
+URL1: http://meteo.infospace.ru/koi/moscow/html/r_index.htm
+URL2: http://meteo.infospace.ru/win/moscow/html/r_index.htm
+
+URL1: http://meteo.infospace.ru/koi/wcond/html/r_index.ssi
+URL2: http://meteo.infospace.ru/win/wcond/html/r_index.ssi
+
+URL1: http://koi.dzik.aha.ru/
+URL2: http://www.dzik.aha.ru/
+
+URL1: http://www-psb.ad-sbras.nsc.ru/kruglk.htm
+URL2: http://www-psb.ad-sbras.nsc.ru/kruglw.htm
--- /dev/null
+
+ BOOKMARKS database and internet robot
+
+ Here is a set of classes, libraries and programs I use to manipulate my
+bookmarks.html. I like Netscape Navigator, but I need more features, so I am
+writing these programs for my needs. I need to extend Navigator's "What's new"
+feature (Navigator 4 named it "Update bookmarks").
+
+ These programs are intended to run as follows.
+1. bkmk2db converts bookmarks.html to bookmarks.db.
+2. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
+ saves results in check.db.
+3. db2bkmk converts bookmarks.db back to bookmarks.html.
+ Then I use this bookmarks file and...
+4. bkmk2db converts bookmarks.html to bookmarks.db.
+5. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
+ saves results in check.db (old file copied to check.old).
+6. (An yet unnamed program) will compare check.old with check.db and generate
+detailed report. For example:
+ this URL is unchanged
+ this URL is changed
+ this URL is unavailable due to: host not found...
+
+ Bookmarks database programs are almost debugged. What need to be done is
+support for aliases. Second version of the internet robot is finished.
+
+ Although not required, these programs work fine with tty_pbar.py (my little
+module for creating text-mode progress bars).
+
+COPYRIGHT and LEGAL ISSUES
+ All programs copyrighted by Oleg Broytmann and PhiloSoft Design. All
+sources protected by GNU GPL. Programs are provided "as-is", without any kind
+of warranty. All usual blah-blah-blah.
+
+ #include <disclaimer>
+
+
+------------------------------ bkmk2db ------------------------------
+ NAME
+ bkmk2db.py - script to convert bookmarks.html to FLAD database.
+
+ SYNOPSIS
+ bkmk2db.py [-its] [/path/to/bookmarks.html]
+
+ DESCRIPTION
+ bkmk2db.py splits given file (or ./bookmarks.html) into FLAD database
+ bookmarks.db in current directory.
+
+ Options:
+ -i
+ Inhibit progress bar. Default is to display progress bar if
+ stderr.isatty()
+
+ -t
+ Convert to text file (for debugging). Default is to convert to
+ FLAD.
+
+ -s
+ Suppress output of statistics at the end of the program. Default
+ is to write how many lines the program read and how many URLs
+ parsed. Also suppress some messages during run.
+
+ BUGS
+ The program starts working by writing lines to header file until
+ BookmarksParser initializes its own output file (this occur when
+ parser encountered 1st <DL> tag). It is misdesign.
+
+ Empty comments (no text after <DD>) are not marked specially in
+ database, so db2bkmk.py will not reconstruct it. I don't need empty
+ <DD>s, so I consider it as feature, not a real bug.
+
+ Aliases are not supported (yet).
+
+
+------------------------------ db2bkmk ------------------------------
+ NAME
+ db2bkmk.py - script to reconstruct bookmarks.html back from FLAD
+ database.
+
+ SYNOPSIS
+ db2bkmk.py [-is] [-t dict.db [-r]]
+
+ DESCRIPTION
+ db2bkmk.py reads bookmarks.db and creates two HTML files -
+ public.html and private.html. The latter is just full
+ bookmarks.html, while the former file hides private folder.
+
+ Options:
+ -i
+ Inhibit progress bar. Default is to display progress bar if
+ stderr.isatty()
+
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ created. Also suppress some messages during run.
+
+ -t dict.db
+ For most tasks, if someone need to process bookmarks.db in a
+ regular way (for example, replace all "gopher://gopher." with
+ "http://www."), it is easy to write special program, processing
+ every DB record. For some tasks it is even simpler and faster to
+ write sed/awk scripts. But there are cases when someone need to
+ process bookmarks.db in a non-regular way: one URL must be changed
+ in one way, another URL - in second way, etc. The -t option allows
+ to use external dictionary for such translation. The dictionary
+ itself is again FLAD database, where every record have two keys -
+ URL1 and URL2. With -t option in effect, db2bkmk generates
+ {private,public}.html, renames them to {private,public}.1, and
+ then translates the entire bookmarks.db again, generating
+ {private,public}.2 (totally 4 files), where every URL1 replaced
+ with URL2 from dictionary. (See koi2win.db for example of
+ translation dictionary)
+
+ -r
+ Reverse the effect of -t option - translate from URL2 to URL1.
+
+ BUGS
+ There are three hacks under line marked with "Dirty hacks here":
+ 1. if record["Folder"] == "Private links":
+ This is to hide passwords from my bookmarks file.
+
+ 2. if record["Folder"] == "All the rest - Unclassified":
+ outfile.write(" "*level + "<DT><H3 NEWITEMHEADER ...")
+ First, I compare folder name with fixed string. This is real string
+ from my bookmarks.html. If anyone want to use the program (s)he
+ should change at least the very strings "Private links" and "All the
+ rest - Unclassified". Second, I use netscapism "NEWITEMHEADER".
+ Yes, I wrote these programs for Navigator's bookmarks.html, but I
+ still would not like to use too many netscapisms here.
+
+
+------------------------------ check_db ------------------------------
+ NAME
+ check_db.py - script to test generated FLAD database.
+
+ SYNOPSIS
+ check_db.py [-s] [-l logfile.err]
+
+ DESCRIPTION
+ check_db.py reads bookmarks.db and tests for various conditions and
+ possible errors. Current tests are for duplicated URLs and too big
+ indent. "Indent without folder" or "Indent too big" may occur if
+ someone edit bookmarks.db manually, inserting a record with incorrect
+ (higher) level (lower levels indents are ok). Every record tested for
+ correct format (that there are no spare keys, date formats are
+ correct).
+
+ Options:
+ -l logfile.err
+ Put error log into log file (errors are printed to stderr
+ anyway).
+
+ -s
+ Suppress information messages while running (errors are printed
+ anyway).
+
+
+------------------------------ chk_urls -----------------------------
+ NAME
+ chk_urls.py - Internet robot
+
+ SYNOPSIS
+ chk_urls.py [-is]
+
+ DESCRIPTION
+ chk_urls.py runs against bookmarks.db, checking every URL and store
+ results in check.db. check.db is FLAD database almost identical to
+ bookmarks.db, with modified LastVisit/LastModified fields. Additional
+ field Error appeared in records that have not been checked by some
+ reasons; the reason is a content of Error field.
+ After every 100 URLs chk_urls creates checkpoint file check.dat (in
+ set_checkpoint()). The file is FLAD suitable to pass to
+ fladc.load_file() (in get_checkpoint()). If interrupted by ^C, killed
+ or crashed, chk_urls can be restarted, and checkpoint file helps to
+ restart from interrupted state. Checkpoint stores size and mtime of
+ bookmarks.db (to note if the file changed while chk_urls interrupted)
+ and last checked record. If chk_urls cannot find checkpoint file, or
+ bookmarks.html changed, chk_urls will restart from the beginning. If
+ there is valid checkpoint and size/mtime are ok - chk_urls will start
+ from interrupted record.
+
+ Options:
+ -i
+ Inhibit progress bar. Default is to display progress bar if
+ stderr.isatty()
+
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+ BUGS
+ Ugly mechanism to catch welcome message from FTP server (from urllib).
+
+
+------------------------------ chk_urls -----------------------------
+ NAME
+ check_urls2.py - Internet robot
+
+ SYNOPSIS
+ check_urls2.py [-is]
+
+ DESCRIPTION
+ check_urls2 is just a second version of chk_urls.py. It forks off a child
+ process and the child check URLs. The parent monitors the child and kills
+ it, if there is no answer within 15 minutes.