--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ For every URL in the FLAD database get info from the Net
+ and store info in check.db
+
+ Version 2.0
+ Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+"""
+
+
+import sys, os, stat, string, time
+from getopt import getopt
+
+import urllib, tempfile
+from copy import _copy_dict
+
+import cPickle
+pickle = cPickle
+
+import fladm, fladc, shutil
+from flog import makelog, openlog
+
+
+os.environ["PATH"] = ".:" + os.environ["PATH"]
+from subproc import Subprocess, RecordFile
+
+
+def set_checkpoint(rec_no):
+ cpfile = open("check.dat", 'w')
+ cpfile.write("# chk_urls checkpoint file\n")
+ cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
+ cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
+ cpfile.write("Record: %d" % rec_no)
+ cpfile.close()
+
+def get_checkpoint():
+ try:
+ cpfile = fladc.load_file("check.dat")
+ if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
+ (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
+ return -3
+
+ return string.atoi(cpfile["Record"])
+
+ except IOError: # No such file
+ return -1
+
+ except KeyError: # No such key in checkpoint file
+ return -2
+
+ except string.atoi_error: # Wrong numeric format
+ return -2
+
+ return 0
+
+def start(db_name, report_stats):
+ start_recno = get_checkpoint()
+ if start_recno < 0:
+ if start_recno == -1:
+ log = makelog("check.log")
+ log("chk_urls started")
+ if report_stats:
+ print " chk_urls: normal start"
+
+ elif start_recno == -2:
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" invalid checkpoint file, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: invalid checkpoint file, checkpoint ignored"
+
+ elif start_recno == -3:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" bookmarks.db changed, checkpoint ignored")
+ if report_stats:
+ print " chk_urls: bookmarks.db changed, checkpoint ignored"
+
+ else:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ start_recno = 0
+
+ elif start_recno == 0:
+ raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
+
+ else: # start_recno > 0
+ if os.path.exists("check.db"):
+ if not os.path.exists("check.old"):
+ shutil.copy("check.db", "check.old")
+ db_name = "check.db"
+
+ log = openlog("check.log")
+ log("chk_urls started")
+ log(" found valid checkpoint file, continue")
+ if report_stats:
+ print " chk_urls: found valid checkpoint file, continue"
+
+ else:
+ log = makelog("check.log")
+ log("chk_urls started")
+ log(" valid checkpoint, but no check.db file, restarting")
+ if report_stats:
+ print " chk_urls: valid checkpoint, but no check.db file, restarting"
+ start_recno = 0
+
+ return start_recno, db_name, log
+
+
+tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp"
+
+
+check_subp = None
+subp_pipe = None
+
+def restart_subp(log, report_stats):
+ global check_subp, subp_pipe
+ if check_subp:
+ log(" restarting hanging subprocess")
+ if report_stats:
+ print " chk_urls: restarting hanging subprocess"
+ del check_subp
+ del subp_pipe
+
+ check_subp = Subprocess("check_url_sub.py")
+ subp_pipe = RecordFile(check_subp)
+
+
+def check_url(record, log, report_stats):
+ try:
+ record["TEMPFILE"] = tempfname
+ subp_pipe.write_record(pickle.dumps(record))
+
+ if check_subp.waitForPendingChar(900): # wait 15 minutes
+ rec = pickle.loads(subp_pipe.read_record())
+ del record["TEMPFILE"]
+ for key in rec.keys():
+ record[key] = rec[key]
+ else:
+ restart_subp(log, report_stats)
+ del record["TEMPFILE"]
+ record["Error"] = "Subprocess connection timed out"
+
+ except KeyboardInterrupt:
+ return 0
+
+ return 1
+
+
+def run():
+ optlist, args = getopt(sys.argv[1:], "ise")
+
+ show_pbar = 1
+ report_stats = 1
+ only_errors = 0
+ db_name = "bookmarks.db"
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-e':
+ only_errors = 1
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design"
+
+ if args:
+ if len(args) > 1:
+ sys.stderr.write("chk_urls: too many arguments\n")
+ sys.exit(1)
+ else:
+ db_name = args[0]
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ global db_stat, log
+ db_stat = os.stat(db_name)
+
+ if only_errors:
+ start_recno = 0
+ db_name = "check.db"
+ log = openlog("check.log")
+ log("chk_urls restarted for errors")
+ else:
+ start_recno, db_name, log = start(db_name, report_stats)
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % db_name)
+ sys.stdout.flush()
+
+ bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
+ bookmarks_dbstore = bookmarks_db
+
+ if only_errors:
+ bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db)
+
+ if report_stats:
+ print "Ok"
+
+ db_len = len(bookmarks_db)
+ if db_len == 0:
+ print "Database empty"
+ sys.exit(0)
+
+ if start_recno >= db_len:
+ _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
+ log(" " + _s)
+ if report_stats:
+ print " chk_urls: " + _s
+ del _s
+ start_recno = 0
+
+ if report_stats:
+ if only_errors:
+ s = "Rechecking errors: "
+ else:
+ s = "Checking: "
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+ if show_pbar:
+ save_stats = report_stats
+ report_stats = 0
+ pbar = ttyProgressBar(0, db_len)
+
+ urls_no = 0
+ record_count = 0
+ start_time = time.time()
+
+ rcode = 1
+ restart_subp(log, report_stats) # Not restart, just start afresh
+ checked_dict = {} # Dictionary of checked URLs, mapped to records number
+
+ for record_no in range(start_recno, db_len):
+ if show_pbar:
+ pbar.display(record_no+1)
+
+ record = bookmarks_db[record_no]
+ record_count = record_count + 1
+
+ if only_errors:
+ del record["Error"]
+
+ if record.has_key("URL"):
+ url = record["URL"]
+ if checked_dict.has_key(url):
+ log("Already checked %s" % url)
+ level = record["Level"]
+ comment = record["Comment"]
+ bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]])
+ bookmarks_db[record_no]["Level"] = level
+ bookmarks_db[record_no]["Comment"] = comment
+ else:
+ log("Checking %s" % url)
+ rcode = check_url(record, log, report_stats)
+ if rcode:
+ current_time = time.time()
+ if current_time - start_time >= 300: # Save checkpoint and database every 5 min
+ bookmarks_dbstore.store_to_file("check.db")
+ set_checkpoint(record_no)
+ log.flush()
+ start_time = current_time
+ urls_no = urls_no + 1
+ checked_dict[url] = record_no
+ else:
+ log("Interrupted by user (^C)")
+ break
+
+ if show_pbar:
+ del pbar
+ report_stats = save_stats
+
+ if report_stats:
+ print "Ok"
+ print record_count, "records checked"
+ print urls_no, "URLs checked"
+
+ bookmarks_dbstore.store_to_file("check.db")
+
+ if rcode:
+ log("chk_urls finished ok")
+ log.close()
+
+ urllib.urlcleanup()
+ if os.path.exists(tempfname):
+ os.unlink(tempfname)
+
+ if rcode:
+ if os.path.exists("check.dat"):
+ os.unlink("check.dat")
+ else:
+ set_checkpoint(record_no)
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ run()