]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Copy icon and charset from the first of the duplicate bookmarks.
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python
2 """
3    Robot interface - check URLs from bookmarks database
4
5    Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
6 """
7
8
9 import sys, os, tempfile
10 tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
11
12
13 def run():
14    from getopt import getopt
15    optlist, args = getopt(sys.argv[1:], "ise")
16
17    show_pbar = 1
18    report_stats = 1
19    only_errors = 0
20
21    for _opt, _arg in optlist:
22       if _opt == '-i':
23          show_pbar = 0
24       if _opt == '-s':
25          report_stats = 0
26       if _opt == '-e':
27          only_errors = 1
28    try:
29       del _opt, _arg
30    except NameError:
31       pass
32
33    if report_stats:
34       print "BroytMann check_urls, Copyright (C) 1997-2007 PhiloSoft Design"
35
36    if args:
37       sys.stderr.write("check_urls: too many arguments\n")
38       sys.stderr.write("Usage: check_urls [-ise]\n")
39       sys.exit(1)
40
41    if show_pbar:
42       show_pbar = sys.stderr.isatty()
43
44    if show_pbar:
45       try:
46          from m_lib.pbar.tty_pbar import ttyProgressBar
47       except ImportError:
48          show_pbar = 0
49
50    from m_lib.flog import makelog, openlog
51    if only_errors:
52       log = openlog("check.log")
53       log("chk_urls restarted for errors")
54       if report_stats:
55          print "chk_urls restarted for errors"
56    else:
57       log = makelog("check.log")
58       log("check_urls started")
59       if report_stats:
60          print "   check_urls: normal start"
61
62    from storage import storage
63    storage = storage()
64
65    from robots import robot
66    robot = robot(tempfname, log)
67
68    if report_stats:
69       sys.stdout.write("Loading %s: " % storage.filename)
70       sys.stdout.flush()
71
72    root_folder = storage.load()
73    from bkmk_objects import make_linear, break_tree
74    make_linear(root_folder)
75    objects = len(root_folder.linear)
76
77    if report_stats:
78       print "Ok"
79
80    if report_stats:
81       if only_errors:
82          s = "Rechecking errors: "
83       else:
84          s = "Checking: "
85       sys.stdout.write(s)
86       sys.stdout.flush()
87
88    if show_pbar:
89       pbar = ttyProgressBar(0, objects)
90
91    urls_no = 0
92    object_count = 0
93    size = 0
94
95    checked = {}
96    rcode = 1
97
98    for object_no in range(objects):
99       if show_pbar:
100          pbar.display(object_no+1)
101
102       object = root_folder.linear[object_no]
103       object_count = object_count + 1
104
105       if object.isBookmark:
106          if only_errors:
107             if hasattr(object, "error"):
108                delattr(object, "error")
109             else:
110                continue
111
112          if checked.has_key(object.href):
113             log("Already checked %s" % object.href)
114             old_object = root_folder.linear[checked[object.href]]
115             for attr_name in ("last_visit", "last_modified",
116                   "error", "no_error", "moved", "size", "md5", "real_title",
117                   "last_tested", "test_time", "icon", "charset"):
118                if hasattr(old_object, attr_name):
119                   setattr(object, attr_name, getattr(old_object, attr_name))
120          else:
121             log("Checking %s" % object.href)
122             rcode = robot.check_url(object)
123
124             if rcode:
125                checked[object.href] = object_no
126                urls_no = urls_no + 1
127                try:
128                   size = size + int(object.size)
129                except (AttributeError, TypeError, ValueError):
130                   pass # Some object does not have a size :(
131             else:
132                log("Interrupted by user (^C)")
133                break
134    robot.stop()
135
136    if show_pbar:
137       del pbar
138
139    if report_stats:
140       print "Ok"
141       print object_count, "objects passed"
142       print urls_no, "URLs checked"
143       print size, "bytes eaten"
144
145    break_tree(root_folder.linear)
146    storage.store(root_folder)
147
148    if rcode:
149       log("check_urls finished ok")
150    log.close()
151
152    try:
153       os.unlink(tempfname)
154    except os.error:
155       pass
156
157
158 if __name__ == '__main__':
159    run()