1 #! /usr/local/bin/python -O
3 For every URL in the FLAD database get info from the Net
4 and store info in check.db
6 Written by BroytMann, Aug-Oct 1997. Copyright (C) 1997 PhiloSoft Design
9 import sys, os, string, stat, shutil, time
10 from getopt import getopt
14 from urllib import URLopener, splittype
16 from md5wrapper import md5wrapper
17 from flog import makelog, openlog
18 import fladm, fladc, www_util
21 # Shortcut for basic usage
27 _urlopener = URLopener()
28 return _urlopener.open(url)
30 def urlretrieve(url, filename=None):
33 _urlopener = URLopener()
35 return _urlopener.retrieve(url, filename)
37 return _urlopener.retrieve(url)
46 def myftpwrapper(user, passwd, host, port, dirs):
48 _key = (user, host, port, string.joinfields(dirs, '/'))
49 return _ftpwrapper(user, passwd, host, port, dirs)
51 _ftpwrapper = urllib.ftpwrapper
52 urllib.ftpwrapper = myftpwrapper
56 _welcome = _urlopener.ftpcache[_key].ftp.welcome
57 _key = None # I am assuming there are no duplicate ftp URLs in db. If there are - _key in prev line is invalid
61 def set_checkpoint(rec_no):
62 cpfile = open("check.dat", 'w')
63 cpfile.write("# chk_urls checkpoint file\n")
64 cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
65 cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
66 cpfile.write("Record: %d" % rec_no)
71 cpfile = fladc.load_file("check.dat")
72 if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
73 (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
76 return string.atoi(cpfile["Record"])
78 except IOError: # No such file
81 except KeyError: # No such key in checkpoint file
84 except string.atoi_error: # Wrong numeric format
90 tempfname = tempfile.gettempprefix() + "check.tmp"
94 if type(msg) == type(""):
102 x = string.join(string.split(str(i), "\n"), "\\n")
106 def check_url(record, url_type, url_rest):
108 now = str(int(time.time()))
111 fname, headers = urlretrieve(url_type + ':' + url_rest, tempfname)
115 record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
119 last_modified = headers["Last-Modified"]
124 last_modified = www_util.parse_time(last_modified)
127 last_modified = str(int(last_modified))
129 last_modified = record["LastVisit"]
131 record["LastModified"] = last_modified
134 if url_type == "ftp": # Pass welcome message through MD5
135 md5.update(get_welcome())
137 md5.md5file(tempfname)
138 record["MD5"] = str(md5)
141 record["Error"] = get_error(msg)
144 record["Error"] = "Unexpected EOF (FTP server closed connection)"
146 except KeyboardInterrupt:
149 # Mark this even in case of error
150 record["LastTested"] = now
156 optlist, args = getopt(sys.argv[1:], "is")
160 db_name = "bookmarks.db"
162 for _opt, _arg in optlist:
173 print "BroytMann chk_urls, Copyright (C) 1997-1998 PhiloSoft Design"
176 sys.stderr.write("chk_urls: too many arguments\n")
180 show_pbar = sys.stderr.isatty()
184 from tty_pbar import ttyProgressBar
189 db_stat = os.stat("bookmarks.db")
191 start_recno = get_checkpoint()
193 if start_recno == -1:
194 log = makelog("check.log")
195 log("chk_urls started")
197 print " chk_urls: normal start"
199 elif start_recno == -2:
200 log = openlog("check.log")
201 log("chk_urls started")
202 log(" invalid checkpoint file, checkpoint ignored")
204 print " chk_urls: invalid checkpoint file, checkpoint ignored"
206 elif start_recno == -3:
207 log = makelog("check.log")
208 log("chk_urls started")
209 log(" bookmarks.db changed, checkpoint ignored")
211 print " chk_urls: bookmarks.db changed, checkpoint ignored"
214 raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
218 elif start_recno == 0:
219 raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
221 else: # start_recno > 0
222 if os.path.exists("check.db"):
223 if not os.path.exists("check.old"):
224 shutil.copy("check.db", "check.old")
227 log = openlog("check.log")
228 log("chk_urls started")
229 log(" found valid checkpoint file, continue")
231 print " chk_urls: found valid checkpoint file, continue"
234 log = makelog("check.log")
235 log("chk_urls started")
236 log(" valid checkpoint, but no check.db file, restarting")
238 print " chk_urls: valid checkpoint, but no check.db file, restarting"
242 sys.stdout.write("Loading %s: " % db_name)
245 bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
246 db_len = len(bookmarks_db)
251 if start_recno >= db_len:
252 _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
255 print " chk_urls: " + _s
260 sys.stdout.write("Checking: ")
264 pbar = ttyProgressBar(0, db_len)
268 start_time = time.time()
271 for record_no in range(start_recno, db_len):
273 pbar.display(record_no+1)
275 record = bookmarks_db[record_no]
276 record_count = record_count + 1
278 if record.has_key("URL"):
279 url_type, url_rest = splittype(record["URL"])
280 log("Checking %s:%s" % (url_type, url_rest))
281 rcode = check_url(record, url_type, url_rest)
283 current_time = time.time()
284 if current_time - start_time >= 300: # Save checkpoint and database every 5 min
285 bookmarks_db.store_to_file("check.db")
286 set_checkpoint(record_no)
288 start_time = current_time
289 urls_no = urls_no + 1
291 log("Interrupted by user (^C)")
299 print record_count, "records checked"
300 print urls_no, "URLs checked"
302 bookmarks_db.store_to_file("check.db")
305 log("chk_urls finished ok")
309 if os.path.exists(tempfname):
313 if os.path.exists("check.dat"):
314 os.unlink("check.dat")
316 set_checkpoint(record_no)
320 if __name__ == '__main__':