]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Added docstrings, __{version,revision,etc}__ boilerplates.
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 __version__ = "$Revision$"[11:-2]
8 __revision__ = "$Id$"[5:-2]
9 __date__ = "$Date$"[7:-2]
10 __author__ = "Oleg Broytman <phd@phdru.name>"
11 __copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
12 __license__ = "GNU GPL"
13
14 import sys, os
15
16
17 def run():
18    from getopt import getopt
19    optlist, args = getopt(sys.argv[1:], "ise")
20
21    show_pbar = 1
22    report_stats = 1
23    only_errors = 0
24
25    for _opt, _arg in optlist:
26       if _opt == '-i':
27          show_pbar = 0
28       if _opt == '-s':
29          report_stats = 0
30       if _opt == '-e':
31          only_errors = 1
32    try:
33       del _opt, _arg
34    except NameError:
35       pass
36
37    if report_stats:
38       print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
39
40    if args:
41       sys.stderr.write("check_urls: too many arguments\n")
42       sys.stderr.write("Usage: check_urls [-ise]\n")
43       sys.exit(1)
44
45    if show_pbar:
46       show_pbar = sys.stderr.isatty()
47
48    if show_pbar:
49       try:
50          from m_lib.pbar.tty_pbar import ttyProgressBar
51       except ImportError:
52          show_pbar = 0
53
54    from m_lib.flog import makelog, openlog
55    if only_errors:
56       log = openlog("check.log")
57       log("chk_urls restarted for errors")
58       if report_stats:
59          print "chk_urls restarted for errors"
60    else:
61       log = makelog("check.log")
62       log("check_urls started")
63       if report_stats:
64          print "   check_urls: normal start"
65
66    from storage import storage
67    storage = storage()
68
69    from robots import robot
70    robot = robot(log)
71
72    if report_stats:
73       sys.stdout.write("Loading %s: " % storage.filename)
74       sys.stdout.flush()
75
76    root_folder = storage.load()
77    from bkmk_objects import make_linear, break_tree
78    make_linear(root_folder)
79    objects = len(root_folder.linear)
80
81    if report_stats:
82       print "Ok"
83
84    if report_stats:
85       if only_errors:
86          s = "Rechecking errors: "
87       else:
88          s = "Checking: "
89       sys.stdout.write(s)
90       sys.stdout.flush()
91
92    if show_pbar:
93       pbar = ttyProgressBar(0, objects)
94
95    urls_no = 0
96    object_count = 0
97    size = 0
98
99    checked = {}
100    rcode = 1
101
102    for object_no in range(objects):
103       if show_pbar:
104          pbar.display(object_no+1)
105
106       object = root_folder.linear[object_no]
107       object_count = object_count + 1
108
109       if object.isBookmark:
110          if object.href.startswith('place:'): # Firefox SmartBookmarks
111             log("Skipped %s" % object.href)
112             continue
113
114          if only_errors:
115             if hasattr(object, "error"):
116                delattr(object, "error")
117             else:
118                continue
119
120          if checked.has_key(object.href):
121             log("Already checked %s" % object.href)
122             old_object = root_folder.linear[checked[object.href]]
123             for attr_name in ("last_visit", "last_modified",
124                   "error", "no_error", "moved", "size", "md5", "real_title",
125                   "last_tested", "test_time", "icon", "charset"):
126                if hasattr(old_object, attr_name):
127                   setattr(object, attr_name, getattr(old_object, attr_name))
128          else:
129             log("Checking %s" % object.href)
130             rcode = robot.check_url(object)
131
132             if rcode:
133                checked[object.href] = object_no
134                urls_no = urls_no + 1
135                try:
136                   size = size + int(object.size)
137                except (AttributeError, TypeError, ValueError):
138                   pass # Some object does not have a size :(
139             else:
140                log("Interrupted by user (^C)")
141                break
142    robot.stop()
143
144    if show_pbar:
145       del pbar
146
147    if report_stats:
148       print "Ok"
149       print object_count, "objects passed"
150       print urls_no, "URLs checked"
151       print size, "bytes eaten"
152
153    break_tree(root_folder.linear)
154    storage.store(root_folder)
155
156    if rcode:
157       log("check_urls finished ok")
158    log.close()
159
160
161 if __name__ == '__main__':
162    run()