2 """Robot interface - check URLs from bookmarks database
4 This file is a part of Bookmarks database and Internet robot.
7 __version__ = "$Revision$"[11:-2]
8 __revision__ = "$Id$"[5:-2]
9 __date__ = "$Date$"[7:-2]
10 __author__ = "Oleg Broytman <phd@phdru.name>"
11 __copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
12 __license__ = "GNU GPL"
18 from getopt import getopt
19 optlist, args = getopt(sys.argv[1:], "ise")
25 for _opt, _arg in optlist:
38 print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
41 sys.stderr.write("check_urls: too many arguments\n")
42 sys.stderr.write("Usage: check_urls [-ise]\n")
46 show_pbar = sys.stderr.isatty()
50 from m_lib.pbar.tty_pbar import ttyProgressBar
54 from m_lib.flog import makelog, openlog
56 log = openlog("check.log")
57 log("chk_urls restarted for errors")
59 print "chk_urls restarted for errors"
61 log = makelog("check.log")
62 log("check_urls started")
64 print " check_urls: normal start"
66 from storage import storage
69 from robots import robot
73 sys.stdout.write("Loading %s: " % storage.filename)
76 root_folder = storage.load()
77 from bkmk_objects import make_linear, break_tree
78 make_linear(root_folder)
79 objects = len(root_folder.linear)
86 s = "Rechecking errors: "
93 pbar = ttyProgressBar(0, objects)
102 for object_no in range(objects):
104 pbar.display(object_no+1)
106 object = root_folder.linear[object_no]
107 object_count = object_count + 1
109 if object.isBookmark:
110 if object.href.startswith('place:'): # Firefox SmartBookmarks
111 log("Skipped %s" % object.href)
115 if hasattr(object, "error"):
116 delattr(object, "error")
120 if checked.has_key(object.href):
121 log("Already checked %s" % object.href)
122 old_object = root_folder.linear[checked[object.href]]
123 for attr_name in ("last_visit", "last_modified",
124 "error", "no_error", "moved", "size", "md5", "real_title",
125 "last_tested", "test_time", "icon", "charset"):
126 if hasattr(old_object, attr_name):
127 setattr(object, attr_name, getattr(old_object, attr_name))
129 log("Checking %s" % object.href)
130 rcode = robot.check_url(object)
133 checked[object.href] = object_no
134 urls_no = urls_no + 1
136 size = size + int(object.size)
137 except (AttributeError, TypeError, ValueError):
138 pass # Some object does not have a size :(
140 log("Interrupted by user (^C)")
149 print object_count, "objects passed"
150 print urls_no, "URLs checked"
151 print size, "bytes eaten"
153 break_tree(root_folder.linear)
154 storage.store(root_folder)
157 log("check_urls finished ok")
161 if __name__ == '__main__':