]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Update TODO: Prepare for Python 3
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import sys, os
9
10
11 __author__ = "Oleg Broytman <phd@phdru.name>"
12 __copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
13 __license__ = "GNU GPL"
14
15
16 def run():
17    from getopt import getopt
18    optlist, args = getopt(sys.argv[1:], "ise")
19
20    show_pbar = 1
21    report_stats = 1
22    only_errors = 0
23
24    for _opt, _arg in optlist:
25       if _opt == '-i':
26          show_pbar = 0
27       if _opt == '-s':
28          report_stats = 0
29       if _opt == '-e':
30          only_errors = 1
31    try:
32       del _opt, _arg
33    except NameError:
34       pass
35
36    if report_stats:
37       print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
38
39    if args:
40       sys.stderr.write("check_urls: too many arguments\n")
41       sys.stderr.write("Usage: check_urls [-ise]\n")
42       sys.exit(1)
43
44    if show_pbar:
45       show_pbar = sys.stderr.isatty()
46
47    if show_pbar:
48       try:
49          from m_lib.pbar.tty_pbar import ttyProgressBar
50       except ImportError:
51          show_pbar = 0
52
53    from m_lib.flog import makelog, openlog
54    if only_errors:
55       log = openlog("check.log")
56       log("chk_urls restarted for errors")
57       if report_stats:
58          print("chk_urls restarted for errors")
59    else:
60       log = makelog("check.log")
61       log("check_urls started")
62       if report_stats:
63          print("   check_urls: normal start")
64
65    from storage import storage
66    storage = storage()
67
68    from robots import robot
69    robot = robot(log)
70
71    if report_stats:
72       sys.stdout.write("Loading %s: " % storage.filename)
73       sys.stdout.flush()
74
75    root_folder = storage.load()
76    from bkmk_objects import make_linear, break_tree
77    make_linear(root_folder)
78    objects = len(root_folder.linear)
79
80    if report_stats:
81       print("Ok")
82
83    if report_stats:
84       if only_errors:
85          s = "Rechecking errors: "
86       else:
87          s = "Checking: "
88       sys.stdout.write(s)
89       sys.stdout.flush()
90
91    if show_pbar:
92       pbar = ttyProgressBar(0, objects)
93
94    urls_no = 0
95    object_count = 0
96    size = 0
97
98    checked = {}
99    rcode = 1
100
101    for object_no in range(objects):
102       if show_pbar:
103          pbar.display(object_no+1)
104
105       object = root_folder.linear[object_no]
106       object_count = object_count + 1
107
108       if object.isBookmark:
109          href = object.href
110          if (href.startswith('place:') # Firefox SmartBookmarks
111                or '%s' in href): # Bookmark with keyword
112             log("Skipped %s" % href)
113             continue
114
115          if only_errors:
116             if hasattr(object, "error"):
117                delattr(object, "error")
118             else:
119                continue
120
121          if checked.has_key(href):
122             log("Already checked %s" % href)
123             old_object = root_folder.linear[checked[href]]
124             for attr_name in ("last_visit", "last_modified",
125                   "error", "no_error", "moved", "size", "md5", "real_title",
126                   "last_tested", "test_time", "icon", "charset"):
127                if hasattr(old_object, attr_name):
128                   setattr(object, attr_name, getattr(old_object, attr_name))
129          else:
130             log("Checking %s" % href)
131             rcode = robot.check_url(object)
132
133             if rcode:
134                checked[href] = object_no
135                urls_no = urls_no + 1
136                try:
137                   size = size + int(object.size)
138                except (AttributeError, TypeError, ValueError):
139                   pass # Some object does not have a size :(
140             else:
141                log("Interrupted by user (^C)")
142                break
143    robot.stop()
144
145    if show_pbar:
146       del pbar
147
148    if report_stats:
149       print("Ok")
150       print(object_count, "objects passed")
151       print(urls_no, "URLs checked")
152       print(size, "bytes eaten")
153
154    break_tree(root_folder.linear)
155    storage.store(root_folder)
156
157    if rcode:
158       log("check_urls finished ok")
159    log.close()
160
161
162 if __name__ == '__main__':
163    run()