1 #! /usr/local/bin/python -O
3 For every URL in the FLAD database get info from the Net
4 and store info in check.db
7 Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
11 import sys, os, stat, string, time
12 from getopt import getopt
14 import urllib, tempfile
15 from copy import _copy_dict
20 import fladm, fladc, shutil
21 from flog import makelog, openlog
24 os.environ["PATH"] = ".:" + os.environ["PATH"]
25 from subproc import Subprocess, RecordFile
28 def set_checkpoint(rec_no):
29 cpfile = open("check.dat", 'w')
30 cpfile.write("# chk_urls checkpoint file\n")
31 cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
32 cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
33 cpfile.write("Record: %d" % rec_no)
38 cpfile = fladc.load_file("check.dat")
39 if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
40 (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
43 return string.atoi(cpfile["Record"])
45 except IOError: # No such file
48 except KeyError: # No such key in checkpoint file
51 except string.atoi_error: # Wrong numeric format
56 def start(db_name, report_stats):
57 start_recno = get_checkpoint()
60 log = makelog("check.log")
61 log("chk_urls started")
63 print " chk_urls: normal start"
65 elif start_recno == -2:
66 log = openlog("check.log")
67 log("chk_urls started")
68 log(" invalid checkpoint file, checkpoint ignored")
70 print " chk_urls: invalid checkpoint file, checkpoint ignored"
72 elif start_recno == -3:
73 log = makelog("check.log")
74 log("chk_urls started")
75 log(" bookmarks.db changed, checkpoint ignored")
77 print " chk_urls: bookmarks.db changed, checkpoint ignored"
80 raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
84 elif start_recno == 0:
85 raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
87 else: # start_recno > 0
88 if os.path.exists("check.db"):
89 if not os.path.exists("check.old"):
90 shutil.copy("check.db", "check.old")
93 log = openlog("check.log")
94 log("chk_urls started")
95 log(" found valid checkpoint file, continue")
97 print " chk_urls: found valid checkpoint file, continue"
100 log = makelog("check.log")
101 log("chk_urls started")
102 log(" valid checkpoint, but no check.db file, restarting")
104 print " chk_urls: valid checkpoint, but no check.db file, restarting"
107 return start_recno, db_name, log
110 tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp"
116 def restart_subp(log, report_stats):
117 global check_subp, subp_pipe
119 log(" restarting hanging subprocess")
121 print " chk_urls: restarting hanging subprocess"
125 check_subp = Subprocess("check_url_sub.py")
126 subp_pipe = RecordFile(check_subp)
129 def check_url(record, log, report_stats):
131 record["TEMPFILE"] = tempfname
132 subp_pipe.write_record(pickle.dumps(record))
134 if check_subp.waitForPendingChar(900): # wait 15 minutes
135 rec = pickle.loads(subp_pipe.read_record())
136 del record["TEMPFILE"]
137 for key in rec.keys():
138 record[key] = rec[key]
140 restart_subp(log, report_stats)
141 del record["TEMPFILE"]
142 record["Error"] = "Subprocess connection timed out"
144 except KeyboardInterrupt:
151 optlist, args = getopt(sys.argv[1:], "ise")
156 db_name = "bookmarks.db"
158 for _opt, _arg in optlist:
171 print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design"
175 sys.stderr.write("chk_urls: too many arguments\n")
181 show_pbar = sys.stderr.isatty()
185 from tty_pbar import ttyProgressBar
190 db_stat = os.stat(db_name)
195 log = openlog("check.log")
196 log("chk_urls restarted for errors")
198 start_recno, db_name, log = start(db_name, report_stats)
201 sys.stdout.write("Loading %s: " % db_name)
204 bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
205 bookmarks_dbstore = bookmarks_db
208 bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db)
213 db_len = len(bookmarks_db)
215 print "Database empty"
218 if start_recno >= db_len:
219 _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
222 print " chk_urls: " + _s
228 s = "Rechecking errors: "
235 save_stats = report_stats
237 pbar = ttyProgressBar(0, db_len)
241 start_time = time.time()
244 restart_subp(log, report_stats) # Not restart, just start afresh
245 checked_dict = {} # Dictionary of checked URLs, mapped to records number
247 for record_no in range(start_recno, db_len):
249 pbar.display(record_no+1)
251 record = bookmarks_db[record_no]
252 record_count = record_count + 1
257 if record.has_key("URL"):
259 if checked_dict.has_key(url):
260 log("Already checked %s" % url)
261 level = record["Level"]
262 comment = record["Comment"]
263 bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]])
264 bookmarks_db[record_no]["Level"] = level
265 bookmarks_db[record_no]["Comment"] = comment
267 log("Checking %s" % url)
268 rcode = check_url(record, log, report_stats)
270 current_time = time.time()
271 if current_time - start_time >= 300: # Save checkpoint and database every 5 min
272 bookmarks_dbstore.store_to_file("check.db")
273 set_checkpoint(record_no)
275 start_time = current_time
276 urls_no = urls_no + 1
277 checked_dict[url] = record_no
279 log("Interrupted by user (^C)")
284 report_stats = save_stats
288 print record_count, "records checked"
289 print urls_no, "URLs checked"
291 bookmarks_dbstore.store_to_file("check.db")
294 log("chk_urls finished ok")
298 if os.path.exists(tempfname):
302 if os.path.exists("check.dat"):
303 os.unlink("check.dat")
305 set_checkpoint(record_no)
309 if __name__ == '__main__':