Initial revision

author Oleg Broytman <phd@phdru.name>

Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)

committer Oleg Broytman <phd@phdru.name>

Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)
author Oleg Broytman <phd@phdru.name>
Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)
committer Oleg Broytman <phd@phdru.name>
Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)
diff --git a/bkmk2db.py b/bkmk2db.py

new file mode 100755 (executable)

index 0000000..39ddea6
--- /dev/null
+++ b/bkmk2db.py
@@ -0,0 +1,143 @@
+#! /usr/local/bin/python -O
+"""
+   Convert Netscape Navigator's bookmarks.html to FLAD database
+
+   Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+import sys, os, stat, string
+from getopt import getopt
+
+import bkmk_parser
+from formatter import AbstractFormatter, NullWriter
+
+
+def run():
+   optlist, args = getopt(sys.argv[1:], "gits")
+
+   show_pbar = 1
+   to_text = 0
+   to_gf = 0
+   report_stats = 1
+
+   for _opt, _arg in optlist:
+      if _opt == '-g':
+         to_gf = 1
+      if _opt == '-i':
+         show_pbar = 0
+      if _opt == '-t':
+         to_text = 1
+      if _opt == '-s':
+         report_stats = 0
+   try:
+      del _opt, _arg
+   except NameError:
+      pass
+
+   if args:
+      if len(args) > 1:
+         sys.stderr.write("bkmk2db: too many arguments\n")
+         sys.exit(1)
+
+      filename = args[0]
+
+   else:
+      filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+
+   if show_pbar:
+      show_pbar = sys.stderr.isatty()
+
+   if show_pbar:
+      try:
+         from tty_pbar import ttyProgressBar
+      except ImportError:
+         show_pbar = 0
+
+   if show_pbar:
+      try:
+         size = os.stat(filename)[stat.ST_SIZE]
+      except:
+         print filename, ": no such file"
+         sys.exit(1)
+
+
+   fmt = AbstractFormatter(NullWriter())
+   if to_text:
+      parser = bkmk_parser.Bookmarks2Text(fmt)
+   elif to_gf:
+      parser = bkmk_parser.Bookmarks2Gadfly(fmt)
+   else:
+      parser = bkmk_parser.Bookmarks2Flad(fmt)
+
+
+   if report_stats:
+      str = "Converting " + filename + " to "
+      if to_text:
+         str = "text"
+      elif to_gf:
+         str = "GadFly database"
+      else:
+         str = "FLAD database"
+
+      sys.stdout.write("Converting %s to %s: " % (filename, str))
+      sys.stdout.flush()
+
+   if show_pbar:
+      pbar = ttyProgressBar(0, size)
+      lng = 0
+
+      # This is for DOS - it counts CRLF, which len() counts as 1 char!
+      if os.name == 'dos' or os.name == 'nt' :
+         dos_add = 1
+      else:
+         dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
+
+   try:
+      f = open(filename, 'r')
+   except IOError, msg:
+      print filename, ":", msg
+      sys.exit(1)
+
+   header = open("header", 'w')
+   line_no = 0
+
+   while 1:
+      line = f.readline()
+      if not line:
+         break
+
+      if show_pbar:
+         lng = lng + len(line) + dos_add
+         pbar.display(lng)
+
+      line = string.strip(line)
+      line_no = line_no + 1
+
+      try:
+         parser.feed(line)
+
+         if parser.outfile: # Write header until HTML parser start writing outfile
+            if header:
+               header.close()
+               header = None
+         else:
+            header.write(line + '\n')
+
+      except:
+         break # I need total number of lines; interpreter will print traceback on exit
+
+   if show_pbar:
+      del pbar
+
+   if report_stats:
+      print "Ok"
+      print line_no, "lines proceed"
+      print parser.urls_no, "urls found"
+      print parser.record_no, "records created"
+
+   parser.close()
+   f.close()
+
+
+if __name__ == '__main__':
+   run()
diff --git a/bkmk_parser.py b/bkmk_parser.py

new file mode 100755 (executable)

index 0000000..c3ca1b5
--- /dev/null
+++ b/bkmk_parser.py
@@ -0,0 +1,321 @@
+"""
+   Bookmarks parsers
+
+   Written by BroytMann, Mar 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+"""
+
+
+import os, string, shutil
+from htmllib import HTMLParser
+
+
+class BookmarksParser(HTMLParser): # Parser for Navigator's bookmarks (abstract class)
+   def __init__(self, formatter, verbose=0):
+      HTMLParser.__init__(self, formatter, verbose)
+      self.urls_no = 0   # cross-reference counter
+      self.record_no = 1  # record counter
+      self.outfile = None # output file
+      self.level = 0      # Indentation level
+      self.flag_out = 0   # Is it time to flush?
+      self.saved_data = ''
+      self.saved_anchor = None
+      self.saved_folder = None
+      self.saved_ruler = None
+
+
+   def flush(self):
+      if not self.outfile:
+         return
+
+      record_flushed = 0
+
+      if self.saved_anchor:
+         name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+         self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + self.saved_data)
+         self.flush_anchor()
+         self.saved_data = ''
+         record_flushed = 1
+         self.saved_anchor = None
+
+      if self.saved_folder:
+         name, add_date, comment = self.saved_folder
+         self.saved_folder = (name, add_date, comment + self.saved_data)
+         self.flush_folder()
+         self.saved_data = ''
+         record_flushed = 1
+         self.saved_folder = None
+
+      if self.saved_ruler:
+         self.flush_ruler()
+         record_flushed = 1
+         self.saved_ruler = None
+
+      if record_flushed:
+         self.record_no = self.record_no + 1
+
+      if self.saved_data <> '': # This may occur after ampersand
+         self.flag_out = 0
+
+
+
+
+   def close(self):
+      HTMLParser.close(self)
+
+      if self.outfile:
+         self.outfile.close()
+
+      if self.level <> 0:
+         print "Bad HTML: <DL> and </DL> mismatch; level=%d" % self.level
+
+
+   def handle_data(self, data):
+      if not self.outfile:
+         return
+
+      if data and (data[0] == '&'): # Ampersand parsed by SGMLlib
+         self.flag_out = 0
+
+      if self.flag_out == 2: # Process comment after <DD> or <HR>
+         if self.saved_anchor:
+            name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+            self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + data)
+            data = '' # Used
+
+         if self.saved_folder:
+            name, add_date, comment = self.saved_folder
+            self.saved_folder = (name, add_date, comment + data)
+            data = '' # Used
+
+         self.flag_out = 0
+
+      if self.flag_out == 1:
+         self.flush()
+
+      if data and (data[0] <> '&') and (self.flag_out == 0):
+         self.flag_out = 1 # Set flag (to flush data on next call)
+
+      if data:
+         self.saved_data = self.saved_data + data
+
+
+   def anchor_bgn(self, href, add_date, last_visit, last_modified):
+      self.flush()
+      self.anchor = (href, add_date, last_visit, last_modified)
+
+
+   def anchor_end(self):
+      if self.anchor:
+         href, add_date, last_visit, last_modified = self.anchor
+         self.anchor = None
+         self.urls_no = self.urls_no + 1
+
+         self.saved_anchor = (self.saved_data, href, add_date, last_visit, last_modified, '')
+         self.saved_data = '' # Used
+
+
+   def start_a(self, attrs):
+      href = ''
+      add_date = ''
+      last_visit = ''
+      last_modified = ''
+
+      for attrname, value in attrs:
+         value = string.strip(value)
+         if attrname == 'href':
+            href = value
+         if attrname == 'add_date':
+            add_date = value
+         if attrname == 'last_visit':
+            last_visit = value
+         if attrname == 'last_modified':
+            last_modified = value
+
+      self.anchor_bgn(href, add_date, last_visit, last_modified)
+
+
+   def start_h3(self, attrs): # Navigator marks folders with <H3> tags
+      self.flush()
+      add_date = ''
+
+      for attrname, value in attrs:
+         value = string.strip(value)
+         if attrname == 'add_date':
+            add_date = value
+
+      self.saved_folder = ('', add_date, '')
+      self.flag_out = 0
+
+
+   def end_h3(self): # End of folder
+      name, add_date, comment = self.saved_folder
+      self.saved_folder = (name + self.saved_data, add_date, comment)
+      self.saved_data = '' # Used
+
+
+   def start_dl(self, attrs):
+      self.flush()
+
+      if not self.outfile: # We are starting output after 1st <DL> tag to skip header
+         self.open_outfile()
+
+      self.level = self.level + 1
+
+
+   def end_dl(self):
+      self.flush()
+      self.level = self.level - 1
+
+
+   def do_dd(self, attrs):
+      if self.outfile:
+         self.flag_out = 2 # Set flag to signal "comment starting"
+
+
+   def do_br(self, attrs):
+      if self.outfile:
+         self.saved_data = self.saved_data + "<BR>" # Add <BR>...
+         self.flag_out = 0 # ...and next line of comment to saved comment
+
+
+   def do_hr(self, attrs):
+      if self.outfile:
+         self.flush()
+         self.saved_ruler = 1
+
+
+   def handle_charref(self, name):
+      if self.outfile:
+         self.flag_out = 0
+         self.saved_data = "%s&%c" % (self.saved_data, chr(name))
+
+
+   def handle_entityref(self, name):
+      if self.outfile:
+         self.flag_out = 0
+         if self.entitydefs.has_key(name): # If it is one of the standard SGML entities - close it with semicolon
+            x = ';'
+         else:
+            x = ''
+         self.saved_data = "%s&%s%s" % (self.saved_data, name, x)
+
+
+   def open_outfile(self):
+      self.outfile = open("bookmarks.tmp", 'w')
+
+
+class Bookmarks2Text(BookmarksParser):
+   def flush_anchor(self):
+      self.outfile.write("   "*(self.level-1) + str(self.saved_anchor) + '\n')
+
+
+   def flush_folder(self):
+      self.outfile.write("   "*(self.level-1) + str(self.saved_folder) + '\n')
+
+
+   def flush_ruler(self):
+      self.outfile.write("   "*(self.level-1) + "----------\n")
+
+
+   def __del__(self):
+      shutil.copy("bookmarks.tmp", "bookmarks.txt")
+      os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Flad(BookmarksParser):
+   def __init__(self, formatter, verbose=0):
+      BookmarksParser.__init__(self, formatter, verbose)
+      self.flush_record = 0
+
+
+   def flush(self):
+      if not self.outfile:
+         return
+
+      record_flushed = 0
+
+      if self.saved_anchor or self.saved_folder or self.saved_ruler or self.saved_data:
+         if self.flush_record:
+            self.outfile.write('\n')
+         else:
+            self.flush_record = 1
+
+      BookmarksParser.flush(self)
+
+
+   def flush_anchor(self):
+      name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+      self.outfile.write("""Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s
+""" % (self.level, name, href, add_date, last_visit, last_modified, comment))
+
+   def flush_folder(self):
+      name, add_date, comment = self.saved_folder
+      self.outfile.write("""Level: %d
+Folder: %s
+AddDate: %s
+Comment: %s
+""" % (self.level, name, add_date, comment))
+
+   def flush_ruler(self):
+      self.outfile.write("Level: %s\nRuler: YES\n" % self.level)
+
+
+   def __del__(self):
+      shutil.copy("bookmarks.tmp", "bookmarks.db")
+      os.unlink("bookmarks.tmp")
+
+
+class Bookmarks2Gadfly(BookmarksParser):
+   def open_outfile(self):
+      import gadfly
+      connection = gadfly.gadfly()
+      connection.startup("bookmarks", ".")
+      self.connection = connection
+
+      cursor = connection.cursor()
+      cursor.execute("""create table bookmarks (
+         rec_no integer,
+         level integer,
+         title varchar,
+         DATA varchar,
+         add_date integer,
+         last_visit integer,
+         last_modified integer,
+         comment varchar
+      )""")
+      self.outfile = cursor
+
+      self.template = """insert into bookmarks
+         (rec_no, level, title, DATA, add_date, last_visit, last_modified, comment)
+         values (?, ?, ?, ?, ?, ?, ?, ?)"""
+
+
+   def __del__(self):
+      self.connection.commit()
+
+
+   def flush_anchor(self):
+      name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
+      self.outfile.execute(self.template,
+         (self.record_no, self.level, name, href,
+         add_date, last_visit, last_modified, comment)
+      )
+
+   def flush_folder(self):
+      name, add_date, comment = self.saved_folder
+      self.outfile.execute(self.template,
+         (self.record_no, self.level, name, "Folder",
+         add_date, '', '', comment)
+      )
+
+   def flush_ruler(self):
+      self.outfile.execute(self.template,
+         (self.record_no, self.level, '', "Ruler",
+         '', '', '', '')
+      )
diff --git a/check_db.py b/check_db.py

new file mode 100755 (executable)

index 0000000..32472e9
--- /dev/null
+++ b/check_db.py
@@ -0,0 +1,186 @@
+#! /usr/local/bin/python -O
+"""
+   Test FLAD database for: duplicate URLs, too big indent, incorrect record
+   format, spare keys.
+
+   Written by BroytMann, Jun 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+"""
+
+import sys, string
+from getopt import getopt
+from copy import _copy_dict
+
+import fladm
+
+
+def error(err_str):
+   global errors_found, report_stats
+   if errors_found == 0:
+      if report_stats:
+         print "errors found"
+
+   errors_found = errors_found + 1
+   sys.stderr.write("%s\n" % err_str)
+
+   if logfile:
+      logfile.write("%s\n" % err_str)
+
+
+def check_key(record_no, record, key, allow_empty=1):
+   if not record.has_key(key):
+      error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
+      return
+
+   if not allow_empty and not record[key]:
+      error("Empty key `%s' in record %d -- %s" % (key, record_no, str(record)))
+
+   del record[key]
+
+def check_date(record_no, record, key):
+   if not record.has_key(key):
+      error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
+   else:
+      try:
+         _date = string.atoi(record[key])
+      except string.atoi_error:
+         error("Bad `%s' format in record %d -- %s" % (key, record_no, str(record)))
+
+      del record[key]
+
+def check_empty(record_no, record):
+   if record <> {}:
+      error("Spare keys in record %d -- %s" % (record_no, str(record)))
+
+def check_url(record_no, record):
+   # I am not testing here check_url("Level") because it is impossible
+   # to come here without "Level" key - fladm.check_record has to reject
+   # entire database if there is record without this "must key".
+   # If someone adds record without "Level" manually - it is serious error
+   # and the following line raise exception.
+   del record["Level"]
+
+   check_key(record_no, record, "Title")
+   check_key(record_no, record, "URL")
+   check_key(record_no, record, "Comment")
+
+   check_date(record_no, record, "AddDate")
+   check_date(record_no, record, "LastVisit")
+   check_date(record_no, record, "LastModified")
+
+   check_empty(record_no, record)
+
+def check_folder(record_no, record):
+   # Read comment above - in the beginning of check_url()
+   del record["Level"]
+
+   check_key(record_no, record, "Folder")
+   check_key(record_no, record, "Comment")
+
+   check_date(record_no, record, "AddDate")
+   check_empty(record_no, record)
+
+def check_ruler(record_no, record):
+   # Read comment above - in the beginning of check_url()
+   del record["Level"]
+
+   if not record.has_key("Ruler"):
+      error("No `Ruler' in record %d -- %s" % (record_no, str(record)))
+   else:
+      if record["Ruler"] <> "YES": # Impossible: ruler saying it is not ruler
+         error("Ruler saying it is not ruler in record %d -- %s" % (record_no, str(record)))
+      del record["Ruler"]
+
+   check_empty(record_no, record)
+
+
+def run():
+   optlist, args = getopt(sys.argv[1:], "l:s")
+
+   global errors_found, report_stats, logfile
+   report_stats = 1
+
+   logfile = None
+   logfname = None
+
+   for _opt, _arg in optlist:
+      if _opt == '-l':
+         logfname = _arg
+      if _opt == '-s':
+         report_stats = 0
+   try:
+      del _opt, _arg
+   except NameError:
+      pass
+
+   if len(args) > 1:
+      sys.stderr.write("check_db: too many arguments\n")
+      sys.exit(1)
+
+
+   if logfname:
+      logfile = open(logfname, 'w')
+
+   if report_stats:
+      sys.stdout.write("Loading: ")
+      sys.stdout.flush()
+
+   bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+   if report_stats:
+      print "Ok"
+      sys.stdout.write("Testing: ")
+      sys.stdout.flush()
+
+   record_no = 0
+   save_level = 1
+   got_folder = 1 # Start as if we already have one folder
+   errors_found = 0
+
+   URL_d = {} # Create hash table full of URLs
+
+   for record in bookmarks_db:
+      record_no = record_no + 1
+      level = string.atoi(record["Level"])
+
+      if record.has_key("URL"):
+         if URL_d.has_key(record["URL"]):
+            error("Duplicate URL (rec. %d, 1st at rec. %d): %s" % (record_no, URL_d[record["URL"]], str(record["URL"])))
+         else:
+            URL_d[record["URL"]] = record_no
+
+         check_url(record_no, _copy_dict(record))
+
+      elif record.has_key("Folder"):
+         check_folder(record_no, _copy_dict(record))
+
+      elif record.has_key("Ruler"):
+         check_ruler(record_no, _copy_dict(record))
+
+      else:
+         raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
+
+      if got_folder:
+         if (level > save_level + 1):
+            error("Indent %d too big (want %d at rec. %d), record: %s" % (level, save_level, record_no, str(record)))
+      else:
+         if (level > save_level):
+            error("Indent %d without folder (rec. %d), record: %s" % (level, record_no, str(record)))
+
+      save_level = level
+      got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+   # End of loop
+
+   if logfname:
+      logfile.close()
+
+   if report_stats:
+      print record_no, "records tested"
+      if errors_found == 0:
+         print "Ok (no errors found)"
+      else:
+         print "%d errors found" % errors_found
+
+
+if __name__ == '__main__':
+   run()
diff --git a/check_new.py b/check_new.py

new file mode 100755 (executable)

index 0000000..5cd0a64
--- /dev/null
+++ b/check_new.py
@@ -0,0 +1,27 @@
+#! /usr/local/bin/python -O
+"""
+   Test FLAD database for old records
+
+   Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+from time import time
+
+now = time()
+thrashold = 2*24*3600 # 2 days
+
+
+def run():
+   bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+   for record in bookmarks_db:
+      if record.has_key("URL"):
+         add_date = int(record["AddDate"])
+         if now - add_date < thrashold:
+            print "New URL:", record["URL"]
+
+
+if __name__ == '__main__':
+   run()
diff --git a/check_old.py b/check_old.py

new file mode 100755 (executable)

index 0000000..15a0990
--- /dev/null
+++ b/check_old.py
@@ -0,0 +1,27 @@
+#! /usr/local/bin/python -O
+"""
+   Test FLAD database for old records
+
+   Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+from time import time
+
+now = time()
+thrashold = 2*30*24*3600 # 2 months
+
+
+def run():
+   bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+   for record in bookmarks_db:
+      if record.has_key("URL"):
+         last_visit = int(record["LastVisit"])
+         if now - last_visit > thrashold:
+            print "Old URL:", record["URL"]
+
+
+if __name__ == '__main__':
+   run()
diff --git a/check_url_sub.py b/check_url_sub.py

new file mode 100755 (executable)

index 0000000..6c301df
--- /dev/null
+++ b/check_url_sub.py
@@ -0,0 +1,145 @@
+#! /usr/local/bin/python -O
+"""
+   Check URL - subprocess
+
+   Written by BroytMann, Mar 1999 - Feb 2000. Copyright (C) 1999-2000 PhiloSoft Design
+"""
+
+
+import sys, os, stat, string, time
+import urllib, www_util
+
+import cPickle
+pickle = cPickle
+from subproc import RecordFile
+
+from md5wrapper import md5wrapper
+
+
+ftpcache_key = None
+def myftpwrapper(user, passwd, host, port, dirs):
+   global ftpcache_key
+   ftpcache_key = (user, host, port, string.joinfields(dirs, '/'))
+   return _ftpwrapper(user, passwd, host, port, dirs)
+
+_ftpwrapper = urllib.ftpwrapper
+urllib.ftpwrapper = myftpwrapper
+
+def get_welcome():
+   global ftpcache_key
+   _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+   ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+                       # If there are - ftpcache_key in prev line is invalid.
+   return _welcome
+
+
+class RedirectException(Exception):
+   reloc_dict = {
+      301: "perm",
+      302: "temp"
+   }
+   def __init__(self, errcode, newurl):
+      Exception.__init__(self, "(%s.) to %s" % (self.reloc_dict[errcode], newurl))
+
+
+class MyURLopener(urllib.URLopener):
+   # Error 302 -- relocated (temporarily)
+   def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 
+      if headers.has_key('location'):
+         newurl = headers['location']
+      elif headers.has_key('uri'):
+         newurl = headers['uri']
+      else:
+         newurl = "Nowhere"
+      raise RedirectException(errcode, newurl)
+
+   # Error 301 -- also relocated (permanently)
+   http_error_301 = http_error_302
+
+   # Error 401 -- authentication required
+   def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 
+      raise IOError, ('http error', errcode, "Authentication required ", headers)
+
+
+def get_error(msg):
+   if type(msg) == type(""):
+      return msg
+
+   else:
+      s = []
+      for i in msg:
+         s.append("'%s'" % string.join(string.split(str(i), "\n"), "\\n"))
+      return "(%s)" % string.join(s)
+
+def check_url(record):
+   try:
+      now = str(int(time.time()))
+      url_type, url_rest = urllib.splittype(record["URL"])
+      url_host, url_path = urllib.splithost(url_rest)
+      url_path, url_tag  = urllib.splittag(url_path)
+
+      tempfname = record["TEMPFILE"]
+      del record["TEMPFILE"]
+
+      fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), tempfname)
+
+      last_modified = None
+      record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
+
+      if headers:
+         try:
+            last_modified = headers["Last-Modified"]
+         except KeyError:
+            last_modified = None
+
+         if last_modified:
+            last_modified = www_util.parse_time(last_modified)
+
+      if last_modified:
+         last_modified = str(int(last_modified))
+      else:
+         last_modified = record["LastVisit"]
+
+      record["LastModified"] = last_modified
+
+      md5 = md5wrapper()
+      if url_type == "ftp": # Pass welcome message through MD5
+         md5.update(get_welcome())
+
+      md5.md5file(tempfname)
+      record["MD5"] = str(md5)
+
+   except IOError, msg:
+      if (msg[0] == "http error") and (msg[1] == -1):
+         record["NoError"] = "The server did not return any header - it is not an error, actually"
+      else:
+         record["Error"] = get_error(msg)
+
+   except EOFError:
+      record["Error"] = "Unexpected EOF (FTP server closed connection)"
+
+   except RedirectException, msg:
+      record["Moved"] = str(msg)
+
+   # Mark this even in case of error
+   record["LastTested"] = now
+
+
+def run():
+   urllib._urlopener = MyURLopener()
+
+   # Some sites allow only Mozilla-compatible browsers; way to stop robots?
+   server_version = "Mozilla/3.0 (compatible; Python-urllib/%s)" % urllib.__version__
+   urllib._urlopener.addheaders[0] = ('User-agent', server_version)
+
+   rec_in = RecordFile(sys.stdin)
+   rec_out = RecordFile(sys.stdout)
+
+   while 1:
+      record = pickle.loads(rec_in.read_record())
+      check_url(record)
+      rec_out.write_record(pickle.dumps(record))
+
+
+if __name__ == '__main__':
+   run()
diff --git a/check_urls2.py b/check_urls2.py

new file mode 100755 (executable)

index 0000000..73a91f9
--- /dev/null
+++ b/check_urls2.py
@@ -0,0 +1,310 @@
+#! /usr/local/bin/python -O
+"""
+   For every URL in the FLAD database get info from the Net
+   and store info in check.db
+
+   Version 2.0
+   Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+
+import sys, os, stat, string, time
+from getopt import getopt
+
+import urllib, tempfile
+from copy import _copy_dict
+
+import cPickle
+pickle = cPickle
+
+import fladm, fladc, shutil
+from flog import makelog, openlog
+
+
+os.environ["PATH"] = ".:" + os.environ["PATH"]
+from subproc import Subprocess, RecordFile
+
+
+def set_checkpoint(rec_no):
+   cpfile = open("check.dat", 'w')
+   cpfile.write("# chk_urls checkpoint file\n")
+   cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
+   cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
+   cpfile.write("Record: %d" % rec_no)
+   cpfile.close()
+
+def get_checkpoint():
+   try:
+      cpfile = fladc.load_file("check.dat")
+      if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
+         (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
+         return -3
+
+      return string.atoi(cpfile["Record"])
+
+   except IOError: # No such file
+      return -1
+
+   except KeyError: # No such key in checkpoint file
+      return -2
+
+   except string.atoi_error: # Wrong numeric format
+      return -2
+
+   return 0
+
+def start(db_name, report_stats):
+   start_recno = get_checkpoint()
+   if start_recno < 0:
+      if start_recno == -1:
+         log = makelog("check.log")
+         log("chk_urls started")
+         if report_stats:
+            print "   chk_urls: normal start"
+
+      elif start_recno == -2:
+         log = openlog("check.log")
+         log("chk_urls started")
+         log("   invalid checkpoint file, checkpoint ignored")
+         if report_stats:
+            print "   chk_urls: invalid checkpoint file, checkpoint ignored"
+
+      elif start_recno == -3:
+         log = makelog("check.log")
+         log("chk_urls started")
+         log("   bookmarks.db changed, checkpoint ignored")
+         if report_stats:
+            print "   chk_urls: bookmarks.db changed, checkpoint ignored"
+
+      else:
+         raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+      start_recno = 0
+
+   elif start_recno == 0:
+      raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+   else: # start_recno > 0
+      if os.path.exists("check.db"):
+         if not os.path.exists("check.old"):
+            shutil.copy("check.db", "check.old")
+         db_name = "check.db"
+
+         log = openlog("check.log")
+         log("chk_urls started")
+         log("   found valid checkpoint file, continue")
+         if report_stats:
+            print "   chk_urls: found valid checkpoint file, continue"
+
+      else:
+         log = makelog("check.log")
+         log("chk_urls started")
+         log("   valid checkpoint, but no check.db file, restarting")
+         if report_stats:
+            print "   chk_urls: valid checkpoint, but no check.db file, restarting"
+         start_recno = 0
+
+   return start_recno, db_name, log
+
+
+tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp"
+
+
+check_subp = None
+subp_pipe = None
+
+def restart_subp(log, report_stats):
+   global check_subp, subp_pipe
+   if check_subp:
+      log("   restarting hanging subprocess")
+      if report_stats:
+         print "   chk_urls: restarting hanging subprocess"
+      del check_subp
+   del subp_pipe
+
+   check_subp = Subprocess("check_url_sub.py")
+   subp_pipe = RecordFile(check_subp)
+
+
+def check_url(record, log, report_stats):
+   try:
+      record["TEMPFILE"] = tempfname
+      subp_pipe.write_record(pickle.dumps(record))
+
+      if check_subp.waitForPendingChar(900): # wait 15 minutes
+         rec = pickle.loads(subp_pipe.read_record())
+         del record["TEMPFILE"]
+         for key in rec.keys():
+            record[key] = rec[key]
+      else:
+         restart_subp(log, report_stats)
+         del record["TEMPFILE"]
+         record["Error"] = "Subprocess connection timed out"
+
+   except KeyboardInterrupt:
+      return 0
+
+   return 1
+
+
+def run():
+   optlist, args = getopt(sys.argv[1:], "ise")
+
+   show_pbar = 1
+   report_stats = 1
+   only_errors = 0
+   db_name = "bookmarks.db"
+
+   for _opt, _arg in optlist:
+      if _opt == '-i':
+         show_pbar = 0
+      if _opt == '-s':
+         report_stats = 0
+      if _opt == '-e':
+         only_errors = 1
+   try:
+      del _opt, _arg
+   except NameError:
+      pass
+
+   if report_stats:
+      print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design"
+
+   if args:
+      if len(args) > 1:
+         sys.stderr.write("chk_urls: too many arguments\n")
+         sys.exit(1)
+      else:
+         db_name = args[0]
+
+   if show_pbar:
+      show_pbar = sys.stderr.isatty()
+
+   if show_pbar:
+      try:
+         from tty_pbar import ttyProgressBar
+      except ImportError:
+         show_pbar = 0
+
+   global db_stat, log
+   db_stat = os.stat(db_name)
+
+   if only_errors:
+      start_recno = 0
+      db_name = "check.db"
+      log = openlog("check.log")
+      log("chk_urls restarted for errors")
+   else:
+      start_recno, db_name, log = start(db_name, report_stats)
+
+   if report_stats:
+      sys.stdout.write("Loading %s: " % db_name)
+      sys.stdout.flush()
+
+   bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
+   bookmarks_dbstore = bookmarks_db
+
+   if only_errors:
+      bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db)
+
+   if report_stats:
+      print "Ok"
+
+   db_len = len(bookmarks_db)
+   if db_len == 0:
+      print "Database empty"
+      sys.exit(0)
+
+   if start_recno >= db_len:
+      _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
+      log("   " + _s)
+      if report_stats:
+         print "   chk_urls: " + _s
+      del _s
+      start_recno = 0
+
+   if report_stats:
+      if only_errors:
+         s = "Rechecking errors: "
+      else:
+         s = "Checking: "
+      sys.stdout.write(s)
+      sys.stdout.flush()
+
+   if show_pbar:
+      save_stats = report_stats
+      report_stats = 0
+      pbar = ttyProgressBar(0, db_len)
+
+   urls_no = 0
+   record_count = 0
+   start_time = time.time()
+
+   rcode = 1
+   restart_subp(log, report_stats) # Not restart, just start afresh
+   checked_dict = {} # Dictionary of checked URLs, mapped to records number
+
+   for record_no in range(start_recno, db_len):
+      if show_pbar:
+         pbar.display(record_no+1)
+
+      record = bookmarks_db[record_no]
+      record_count = record_count + 1
+
+      if only_errors:
+         del record["Error"]
+
+      if record.has_key("URL"):
+         url = record["URL"]
+         if checked_dict.has_key(url):
+            log("Already checked %s" % url)
+            level = record["Level"]
+            comment = record["Comment"]
+            bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]])
+            bookmarks_db[record_no]["Level"] = level
+            bookmarks_db[record_no]["Comment"] = comment
+         else:
+            log("Checking %s" % url)
+            rcode = check_url(record, log, report_stats)
+            if rcode:
+               current_time = time.time()
+               if current_time - start_time >= 300: # Save checkpoint and database every 5 min
+                  bookmarks_dbstore.store_to_file("check.db")
+                  set_checkpoint(record_no)
+                  log.flush()
+                  start_time = current_time
+               urls_no = urls_no + 1
+               checked_dict[url] = record_no
+            else:
+               log("Interrupted by user (^C)")
+               break
+
+   if show_pbar:
+      del pbar
+      report_stats = save_stats 
+
+   if report_stats:
+      print "Ok"
+      print record_count, "records checked"
+      print urls_no, "URLs checked"
+
+   bookmarks_dbstore.store_to_file("check.db")
+
+   if rcode:
+      log("chk_urls finished ok")
+   log.close()
+
+   urllib.urlcleanup()
+   if os.path.exists(tempfname):
+      os.unlink(tempfname)
+
+   if rcode:
+      if os.path.exists("check.dat"):
+         os.unlink("check.dat")
+   else:
+      set_checkpoint(record_no)
+      sys.exit(1)
+
+
+if __name__ == '__main__':
+   run()
diff --git a/chk_urls.py b/chk_urls.py

new file mode 100755 (executable)

index 0000000..8bc3ddd
--- /dev/null
+++ b/chk_urls.py
@@ -0,0 +1,321 @@
+#! /usr/local/bin/python -O
+"""
+   For every URL in the FLAD database get info from the Net
+   and store info in check.db
+
+   Written by BroytMann, Aug-Oct 1997. Copyright (C) 1997 PhiloSoft Design
+"""
+
+import sys, os, string, stat, shutil, time
+from getopt import getopt
+import tempfile
+
+import urllib
+from urllib import URLopener, splittype
+
+from md5wrapper import md5wrapper
+from flog import makelog, openlog
+import fladm, fladc, www_util
+
+
+# Shortcut for basic usage
+_urlopener = None
+
+def urlopen(url):
+   global _urlopener
+   if not _urlopener:
+      _urlopener = URLopener()
+   return _urlopener.open(url)
+
+def urlretrieve(url, filename=None):
+   global _urlopener
+   if not _urlopener:
+      _urlopener = URLopener()
+   if filename:
+       return _urlopener.retrieve(url, filename)
+   else:
+       return _urlopener.retrieve(url)
+
+def urlcleanup():
+   if _urlopener:
+      _urlopener.cleanup()
+
+
+_key = None
+
+def myftpwrapper(user, passwd, host, port, dirs):
+   global _key
+   _key = (user, host, port, string.joinfields(dirs, '/'))
+   return _ftpwrapper(user, passwd, host, port, dirs)
+
+_ftpwrapper = urllib.ftpwrapper
+urllib.ftpwrapper = myftpwrapper
+
+def get_welcome():
+   global _key
+   _welcome = _urlopener.ftpcache[_key].ftp.welcome
+   _key = None # I am assuming there are no duplicate ftp URLs in db. If there are - _key in prev line is invalid
+   return _welcome
+
+
+def set_checkpoint(rec_no):
+   cpfile = open("check.dat", 'w')
+   cpfile.write("# chk_urls checkpoint file\n")
+   cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
+   cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
+   cpfile.write("Record: %d" % rec_no)
+   cpfile.close()
+
+def get_checkpoint():
+   try:
+      cpfile = fladc.load_file("check.dat")
+      if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
+         (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
+         return -3
+
+      return string.atoi(cpfile["Record"])
+
+   except IOError: # No such file
+      return -1
+
+   except KeyError: # No such key in checkpoint file
+      return -2
+
+   except string.atoi_error: # Wrong numeric format
+      return -2
+
+   return 0
+
+
+tempfname = tempfile.gettempprefix() + "check.tmp"
+
+
+def get_error(msg):
+   if type(msg) == type(""):
+      return msg
+
+   else:
+      s = ""
+      for i in msg:
+         if s <> "":
+            s = s + ", "
+         x = string.join(string.split(str(i), "\n"), "\\n")
+         s = s + "'%s'" % x
+      return "(" + s + ")"
+
+def check_url(record, url_type, url_rest):
+
+   now = str(int(time.time()))
+
+   try:
+      fname, headers = urlretrieve(url_type + ':' + url_rest, tempfname)
+
+      last_modified = None
+
+      record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
+
+      if headers:
+         try:
+            last_modified = headers["Last-Modified"]
+         except KeyError:
+            last_modified = None
+
+         if last_modified:
+            last_modified = www_util.parse_time(last_modified)
+
+      if last_modified:
+         last_modified = str(int(last_modified))
+      else:
+         last_modified = record["LastVisit"]
+
+      record["LastModified"] = last_modified
+
+      md5 = md5wrapper()
+      if url_type == "ftp": # Pass welcome message through MD5
+         md5.update(get_welcome())
+
+      md5.md5file(tempfname)
+      record["MD5"] = str(md5)
+
+   except IOError, msg:
+      record["Error"] = get_error(msg)
+
+   except EOFError:
+      record["Error"] = "Unexpected EOF (FTP server closed connection)"
+
+   except KeyboardInterrupt:
+      return 0
+
+   # Mark this even in case of error
+   record["LastTested"] = now
+
+   return 1
+
+
+def run():
+   optlist, args = getopt(sys.argv[1:], "is")
+
+   show_pbar = 1
+   report_stats = 1
+   db_name = "bookmarks.db"
+
+   for _opt, _arg in optlist:
+      if _opt == '-i':
+         show_pbar = 0
+      if _opt == '-s':
+         report_stats = 0
+   try:
+      del _opt, _arg
+   except NameError:
+      pass
+
+   if report_stats:
+      print "BroytMann chk_urls, Copyright (C) 1997-1998 PhiloSoft Design"
+
+   if args:
+      sys.stderr.write("chk_urls: too many arguments\n")
+      sys.exit(1)
+
+   if show_pbar:
+      show_pbar = sys.stderr.isatty()
+
+   if show_pbar:
+      try:
+         from tty_pbar import ttyProgressBar
+      except ImportError:
+         show_pbar = 0
+
+   global db_stat, log
+   db_stat = os.stat("bookmarks.db")
+
+   start_recno = get_checkpoint()
+   if start_recno < 0:
+      if start_recno == -1:
+         log = makelog("check.log")
+         log("chk_urls started")
+         if report_stats:
+            print "   chk_urls: normal start"
+
+      elif start_recno == -2:
+         log = openlog("check.log")
+         log("chk_urls started")
+         log("   invalid checkpoint file, checkpoint ignored")
+         if report_stats:
+            print "   chk_urls: invalid checkpoint file, checkpoint ignored"
+
+      elif start_recno == -3:
+         log = makelog("check.log")
+         log("chk_urls started")
+         log("   bookmarks.db changed, checkpoint ignored")
+         if report_stats:
+            print "   chk_urls: bookmarks.db changed, checkpoint ignored"
+
+      else:
+         raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+      start_recno = 0
+
+   elif start_recno == 0:
+      raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+   else: # start_recno > 0
+      if os.path.exists("check.db"):
+         if not os.path.exists("check.old"):
+            shutil.copy("check.db", "check.old")
+         db_name = "check.db"
+
+         log = openlog("check.log")
+         log("chk_urls started")
+         log("   found valid checkpoint file, continue")
+         if report_stats:
+            print "   chk_urls: found valid checkpoint file, continue"
+
+      else:
+         log = makelog("check.log")
+         log("chk_urls started")
+         log("   valid checkpoint, but no check.db file, restarting")
+         if report_stats:
+            print "   chk_urls: valid checkpoint, but no check.db file, restarting"
+         start_recno = 0
+
+   if report_stats:
+      sys.stdout.write("Loading %s: " % db_name)
+      sys.stdout.flush()
+
+   bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
+   db_len = len(bookmarks_db)
+
+   if report_stats:
+      print "Ok"
+
+   if start_recno >= db_len:
+      _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
+      log("   " + _s)
+      if report_stats:
+         print "   chk_urls: " + _s
+      del _s
+      start_recno = 0
+
+   if report_stats:
+      sys.stdout.write("Checking: ")
+      sys.stdout.flush()
+
+   if show_pbar:
+      pbar = ttyProgressBar(0, db_len)
+
+   urls_no = 0
+   record_count = 0
+   start_time = time.time()
+
+   rcode = 1
+   for record_no in range(start_recno, db_len):
+      if show_pbar:
+         pbar.display(record_no+1)
+
+      record = bookmarks_db[record_no]
+      record_count = record_count + 1
+
+      if record.has_key("URL"):
+         url_type, url_rest = splittype(record["URL"])
+         log("Checking %s:%s" % (url_type, url_rest))
+         rcode = check_url(record, url_type, url_rest)
+         if rcode:
+            current_time = time.time()
+            if current_time - start_time >= 300: # Save checkpoint and database every 5 min
+               bookmarks_db.store_to_file("check.db")
+               set_checkpoint(record_no)
+               log.flush()
+               start_time = current_time
+            urls_no = urls_no + 1
+         else:
+            log("Interrupted by user (^C)")
+            break
+
+   if show_pbar:
+      del pbar
+
+   if report_stats:
+      print "Ok"
+      print record_count, "records checked"
+      print urls_no, "URLs checked"
+
+   bookmarks_db.store_to_file("check.db")
+
+   if rcode:
+      log("chk_urls finished ok")
+   log.close()
+
+   urlcleanup()
+   if os.path.exists(tempfname):
+      os.unlink(tempfname)
+
+   if rcode:
+      if os.path.exists("check.dat"):
+         os.unlink("check.dat")
+   else:
+      set_checkpoint(record_no)
+      sys.exit(1)
+
+
+if __name__ == '__main__':
+   run()
diff --git a/copy_err.py b/copy_err.py

new file mode 100755 (executable)

index 0000000..4aa6e35
--- /dev/null
+++ b/copy_err.py
@@ -0,0 +1,24 @@
+#! /usr/local/bin/python -O
+"""
+   Test FLAD database for old records
+
+   Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import fladm
+
+
+def run():
+   bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+   errors = fladm.Flad_WithMustKeys(fladm.check_record, ["Level"])
+
+   for record in bookmarks_db:
+      if record.has_key("Error"):
+         errors.append(record)
+
+   errors.store_to_file("errors.db")
+
+
+if __name__ == '__main__':
+   run()
diff --git a/db2bkmk.py b/db2bkmk.py

new file mode 100755 (executable)

index 0000000..cc2bb82
--- /dev/null
+++ b/db2bkmk.py
@@ -0,0 +1,220 @@
+#! /usr/local/bin/python -O
+"""
+   Convert FLAD database back to bookmarks.html suitable for Netscape Navigator
+
+   Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+import sys, os, string, shutil
+from getopt import getopt
+
+import fladm
+
+
+def write(str):
+   if private_level == 0: # Put in public all except private folder
+      public_html.write(str)
+   private_html.write(str)
+
+
+def unindent(old_level, new_level):
+   while old_level > new_level:
+      old_level = old_level - 1
+      write("    "*old_level + "</DL><p>\n")
+
+
+def gen_html(bookmarks_db, show_pbar, report_stats):
+   global pbar, record_no, urls_no, public_html, private_html, private_level
+
+   shutil.copy("header", "public.html")
+   shutil.copy("header", "private.html")
+
+   public_html = open("public.html", 'a')
+   private_html = open("private.html", 'a')
+
+   record_no = 0
+   urls_no = 0
+
+   save_level = 0
+   got_folder = 1 # Start as if we already have one folder
+   private_level = 0
+
+   for record in bookmarks_db:
+      record_no = record_no + 1
+
+      if show_pbar:
+         pbar.display(record_no)
+
+      level = string.atoi(record["Level"])
+
+      if level == save_level:
+         pass
+      elif level == save_level + 1:
+         if got_folder:
+            write("    "*(level - 1) + "<DL><p>\n")
+         else:
+            raise ValueError, "indent without folder"
+      elif level <= save_level - 1:
+         unindent(save_level, level)
+      else:
+         raise ValueError, "new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)
+
+      save_level = level
+      got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+      if private_level == save_level:
+         private_level = 0 # We've returned to saved private level - private folder is over
+
+      if record.has_key("URL"):
+         write("    "*level + '<DT><A HREF="%s" ADD_DATE="%s" LAST_VISIT="%s" LAST_MODIFIED="%s">%s</A>\n' % (record["URL"], record["AddDate"], record["LastVisit"], record["LastModified"], record["Title"]))
+         urls_no = urls_no + 1
+
+      elif record.has_key("Folder"):
+         # Dirty hacks here
+         if (record["Folder"] == "Private links") and (private_level == 0):
+            private_level = save_level # We found private folder - save its level
+
+         if record["Folder"] == "All the rest - Unclassified":
+            write("    "*level + '<DT><H3 NEWITEMHEADER ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
+         else:
+            write("    "*level + '<DT><H3 ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
+
+      elif record.has_key("Ruler"):
+         write("    "*level + "<HR>\n")
+
+      else:
+         raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
+
+      if record.has_key("Comment") and (record["Comment"] <> ''):
+         write("<DD>%s\n" % string.join(string.split(record["Comment"], "<BR>"), "<BR>\n"))
+
+
+   if save_level >= 0:
+      unindent(save_level, 0)
+   else:
+      raise ValueError, "new level (%d) too little - must be >= 0" % save_level
+
+   public_html.close()
+   private_html.close()
+
+   if show_pbar:
+      del pbar
+
+   if report_stats:
+      print "Ok"
+
+
+def translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats):
+   global pbar, record_no, urls_no, public_html, private_html, private_level
+
+   new_ext = str(transl)
+   os.rename("public.html", "public." + new_ext)
+   os.rename("private.html", "private." + new_ext)
+
+   transl_d = {}
+   transl_db = fladm.load_from_file(transldb_name, fladm.check_record, ["URL1", "URL2"], [""])
+                                     # This prevents any other key to appear in transl.db ^
+
+   # Generate translation dictionary (hash table)
+   if transl == 1:
+      for record in transl_db:
+         transl_d[record["URL1"]] = record["URL2"]
+   elif transl == 2:
+      for record in transl_db:
+         transl_d[record["URL2"]] = record["URL1"]
+   else:
+      raise ValueError, "transl (%d) must be 1 or 2" % transl
+
+   del transl_db # Save few bytes of memory
+   transl_k = transl_d.keys()
+
+   # Translate URLs
+   for record in bookmarks_db:
+      if record.has_key("URL") and (record["URL"] in transl_k):
+         record["URL"] = transl_d[record["URL"]]
+
+   gen_html(bookmarks_db, show_pbar, report_stats)
+
+   new_ext = str(3 - transl) # Translate 1 to 2, or 2 to 1
+   os.rename("public.html", "public." + new_ext)
+   os.rename("private.html", "private." + new_ext)
+
+
+def run():
+   global pbar, record_no, urls_no, public_html, private_html, private_level
+
+   optlist, args = getopt(sys.argv[1:], "ist:r")
+
+   show_pbar = 1
+   report_stats = 1
+
+   transldb_name = "" # dictionary translation; default is no translation
+   transl = 0
+
+   for _opt, _arg in optlist:
+      if _opt == '-i':
+         show_pbar = 0
+      if _opt == '-s':
+         report_stats = 0
+      if _opt == '-t':
+         transldb_name = _arg
+         transl = 1
+      if _opt == '-r':
+         transl = 2
+   try:
+      del _opt, _arg
+   except NameError:
+      pass
+
+   if args:
+      sys.stderr.write("db2bkmk: too many arguments\n")
+      sys.exit(1)
+
+   if show_pbar:
+      show_pbar = sys.stderr.isatty()
+
+   if show_pbar:
+      try:
+         from tty_pbar import ttyProgressBar
+      except ImportError:
+         show_pbar = 0
+
+   if report_stats:
+      sys.stdout.write("Loading: ")
+      sys.stdout.flush()
+
+   bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+
+   if report_stats:
+      print "Ok"
+      sys.stdout.write("Converting FLAD database to bookmarks.html: ")
+      sys.stdout.flush()
+
+   if show_pbar:
+      pbar = ttyProgressBar(0, len(bookmarks_db))
+
+   gen_html(bookmarks_db, show_pbar, report_stats)
+
+   if transl:
+      if report_stats:
+         sys.stdout.write("Translating: ")
+         sys.stdout.flush()
+
+      if report_stats and show_pbar: # Display bar only without "-i";
+                                       # with "-s" skip it (one bar already
+                                       # displayed, and it is enough)
+         pbar = ttyProgressBar(0, len(bookmarks_db))
+
+      else:
+         show_pbar = 0
+
+      translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats)
+
+
+   if report_stats:
+      print record_no, "records proceed"
+      print urls_no, "urls created"
+
+
+if __name__ == '__main__':
+   run()
diff --git a/koi2win.db b/koi2win.db

new file mode 100644 (file)

index 0000000..75433b8
--- /dev/null
+++ b/koi2win.db
@@ -0,0 +1,14 @@
+URL1: http://www.xland.ru:8088/tel_koi/owa/tel.intro
+URL2: http://www.xland.ru:8088/tel_win/owa/tel.intro
+
+URL1: http://meteo.infospace.ru/koi/moscow/html/r_index.htm
+URL2: http://meteo.infospace.ru/win/moscow/html/r_index.htm
+
+URL1: http://meteo.infospace.ru/koi/wcond/html/r_index.ssi
+URL2: http://meteo.infospace.ru/win/wcond/html/r_index.ssi
+
+URL1: http://koi.dzik.aha.ru/
+URL2: http://www.dzik.aha.ru/
+
+URL1: http://www-psb.ad-sbras.nsc.ru/kruglk.htm
+URL2: http://www-psb.ad-sbras.nsc.ru/kruglw.htm
diff --git a/readme b/readme

new file mode 100644 (file)

index 0000000..13d197f
--- /dev/null
+++ b/readme
@@ -0,0 +1,207 @@
+
+                    BOOKMARKS database and internet robot
+
+   Here is a set of classes, libraries and programs I use to manipulate my
+bookmarks.html. I like Netscape Navigator, but I need more features, so I am
+writing these programs for my needs. I need to extend Navigator's "What's new"
+feature (Navigator 4 named it "Update bookmarks").
+
+   These programs are intended to run as follows.
+1. bkmk2db converts bookmarks.html to bookmarks.db.
+2. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
+   saves results in check.db.
+3. db2bkmk converts bookmarks.db back to bookmarks.html.
+   Then I use this bookmarks file and...
+4. bkmk2db converts bookmarks.html to bookmarks.db.
+5. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
+   saves results in check.db (old file copied to check.old).
+6. (An yet unnamed program) will compare check.old with check.db and generate
+detailed report. For example:
+   this URL is unchanged
+   this URL is changed
+   this URL is unavailable due to: host not found...
+
+   Bookmarks database programs are almost debugged. What need to be done is
+support for aliases. Second version of the internet robot is finished.
+
+   Although not required, these programs work fine with tty_pbar.py (my little
+module for creating text-mode progress bars).
+
+COPYRIGHT and LEGAL ISSUES
+   All  programs  copyrighted by Oleg Broytmann and PhiloSoft Design. All
+sources protected by GNU  GPL.  Programs are provided "as-is", without any kind
+of warranty. All usual blah-blah-blah.
+
+   #include <disclaimer>
+
+
+------------------------------ bkmk2db ------------------------------
+   NAME
+      bkmk2db.py - script to convert bookmarks.html to FLAD database.
+
+   SYNOPSIS
+      bkmk2db.py [-its] [/path/to/bookmarks.html]
+
+   DESCRIPTION
+      bkmk2db.py splits given file (or ./bookmarks.html) into FLAD database
+      bookmarks.db in current directory.
+
+      Options:
+      -i
+         Inhibit progress bar. Default is to display progress bar if
+         stderr.isatty()
+
+      -t
+         Convert to text file (for debugging). Default is to convert to
+         FLAD.
+
+      -s
+         Suppress output of statistics at the end of the program. Default
+         is to write how many lines the program read and how many URLs
+         parsed. Also suppress some messages during run.
+
+   BUGS
+      The program starts working by writing lines to header file until
+      BookmarksParser initializes its own output file (this occur when
+      parser encountered 1st <DL> tag). It is misdesign.
+
+      Empty comments (no text after <DD>) are not marked specially in
+      database, so db2bkmk.py will not reconstruct it. I don't need empty
+      <DD>s, so I consider it as feature, not a real bug.
+
+      Aliases are not supported (yet).
+
+
+------------------------------ db2bkmk ------------------------------
+   NAME
+      db2bkmk.py - script to reconstruct bookmarks.html back from FLAD
+      database.
+
+   SYNOPSIS
+      db2bkmk.py [-is] [-t dict.db [-r]]
+
+   DESCRIPTION
+      db2bkmk.py reads bookmarks.db and creates two HTML files -
+      public.html and private.html. The latter is just full
+      bookmarks.html, while the former file hides private folder.
+
+      Options:
+      -i
+         Inhibit progress bar. Default is to display progress bar if
+         stderr.isatty()
+
+      -s
+         Suppress output of statistics at the end of the program. Default is
+         to write how many records the program proceed and how many URLs
+         created. Also suppress some messages during run.
+
+      -t dict.db
+         For most tasks, if someone need to process bookmarks.db in a
+         regular way (for example, replace all "gopher://gopher." with
+         "http://www."), it is easy to write special program, processing
+         every DB record. For some tasks it is even simpler and faster to
+         write sed/awk scripts. But there are cases when someone need to
+         process bookmarks.db in a non-regular way: one URL must be changed
+         in one way, another URL - in second way, etc. The -t option allows
+         to use external dictionary for such translation. The dictionary
+         itself is again FLAD database, where every record have two keys -
+         URL1 and URL2. With -t option in effect, db2bkmk generates
+         {private,public}.html, renames them to {private,public}.1, and
+         then translates the entire bookmarks.db again, generating
+         {private,public}.2 (totally 4 files), where every URL1 replaced
+         with URL2 from dictionary. (See koi2win.db for example of
+         translation dictionary)
+
+      -r
+         Reverse the effect of -t option - translate from URL2 to URL1.
+
+   BUGS
+      There are three hacks under line marked with "Dirty hacks here":
+      1. if record["Folder"] == "Private links":
+      This is to hide passwords from my bookmarks file.
+
+      2. if record["Folder"] == "All the rest - Unclassified":
+            outfile.write("    "*level + "<DT><H3 NEWITEMHEADER ...")
+      First, I compare folder name with fixed string. This is real string
+      from my bookmarks.html. If anyone want to use the program (s)he
+      should change at least the very strings "Private links" and "All the
+      rest - Unclassified". Second, I use netscapism "NEWITEMHEADER".
+      Yes, I wrote these programs for Navigator's bookmarks.html, but I
+      still would not like to use too many netscapisms here.
+
+
+------------------------------ check_db ------------------------------
+   NAME
+      check_db.py - script to test generated FLAD database.
+
+   SYNOPSIS
+      check_db.py [-s] [-l logfile.err]
+
+   DESCRIPTION
+      check_db.py reads bookmarks.db and tests for various conditions and
+      possible errors. Current tests are for duplicated URLs and too big
+      indent. "Indent without folder" or "Indent too big" may occur if
+      someone edit bookmarks.db manually, inserting a record with incorrect
+      (higher) level (lower levels indents are ok). Every record tested for
+      correct format (that there are no spare keys, date formats are
+      correct).
+
+      Options:
+      -l logfile.err
+         Put error log into log file (errors are printed to stderr
+         anyway).
+
+      -s
+         Suppress information messages while running (errors are printed
+         anyway).
+
+
+------------------------------ chk_urls -----------------------------
+   NAME
+      chk_urls.py - Internet robot
+
+   SYNOPSIS
+      chk_urls.py [-is]
+
+   DESCRIPTION
+      chk_urls.py runs against bookmarks.db, checking every URL and store
+      results in check.db. check.db is FLAD database almost identical to
+      bookmarks.db, with modified LastVisit/LastModified fields. Additional
+      field Error appeared in records that have not been checked by some
+      reasons; the reason is a content of Error field.
+      After every 100 URLs chk_urls creates checkpoint file check.dat (in
+      set_checkpoint()). The file is FLAD suitable to pass to
+      fladc.load_file() (in get_checkpoint()). If interrupted by ^C, killed
+      or crashed, chk_urls can be restarted, and checkpoint file helps to
+      restart from interrupted state. Checkpoint stores size and mtime of
+      bookmarks.db (to note if the file changed while chk_urls interrupted)
+      and last checked record. If chk_urls cannot find checkpoint file, or
+      bookmarks.html changed, chk_urls will restart from the beginning. If
+      there is valid checkpoint and size/mtime are ok - chk_urls will start
+      from interrupted record.
+
+      Options:
+      -i
+         Inhibit progress bar. Default is to display progress bar if
+         stderr.isatty()
+
+      -s
+         Suppress output of statistics at the end of the program. Default is
+         to write how many records the program proceed and how many URLs
+         checked. Also suppress some messages during run.
+
+   BUGS
+      Ugly mechanism to catch welcome message from FTP server (from urllib).
+
+
+------------------------------ chk_urls -----------------------------
+   NAME
+      check_urls2.py - Internet robot
+
+   SYNOPSIS
+      check_urls2.py [-is]
+
+   DESCRIPTION
+      check_urls2 is just a second version of chk_urls.py. It forks off a child
+      process and the child check URLs. The parent monitors the child and kills
+      it, if there is no answer within 15 minutes.
author	Oleg Broytman <phd@phdru.name>
	Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)
committer	Oleg Broytman <phd@phdru.name>
	Thu, 24 Jul 2003 14:08:19 +0000 (14:08 +0000)
bkmk2db.py	[new file with mode: 0755]	patch \| blob
bkmk_parser.py	[new file with mode: 0755]	patch \| blob
check_db.py	[new file with mode: 0755]	patch \| blob
check_new.py	[new file with mode: 0755]	patch \| blob
check_old.py	[new file with mode: 0755]	patch \| blob
check_url_sub.py	[new file with mode: 0755]	patch \| blob
check_urls2.py	[new file with mode: 0755]	patch \| blob
chk_urls.py	[new file with mode: 0755]	patch \| blob
copy_err.py	[new file with mode: 0755]	patch \| blob
db2bkmk.py	[new file with mode: 0755]	patch \| blob
koi2win.db	[new file with mode: 0644]	patch \| blob
readme	[new file with mode: 0644]	patch \| blob