]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import sys, os
9
10
11 __author__ = "Oleg Broytman <phd@phdru.name>"
12 __copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
13 __license__ = "GNU GPL"
14
15
16 def run():
17     from getopt import getopt
18     optlist, args = getopt(sys.argv[1:], "ise")
19
20     show_pbar = 1
21     report_stats = 1
22     only_errors = 0
23
24     for _opt, _arg in optlist:
25         if _opt == '-i':
26             show_pbar = 0
27         if _opt == '-s':
28             report_stats = 0
29         if _opt == '-e':
30             only_errors = 1
31     try:
32         del _opt, _arg
33     except NameError:
34         pass
35
36     if report_stats:
37         print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
38
39     if args:
40         sys.stderr.write("check_urls: too many arguments\n")
41         sys.stderr.write("Usage: check_urls [-ise]\n")
42         sys.exit(1)
43
44     if show_pbar:
45         show_pbar = sys.stderr.isatty()
46
47     if show_pbar:
48         try:
49             from m_lib.pbar.tty_pbar import ttyProgressBar
50         except ImportError:
51             show_pbar = 0
52
53     from m_lib.flog import makelog, openlog
54     if only_errors:
55         log = openlog("check.log")
56         log("chk_urls restarted for errors")
57         if report_stats:
58             print("chk_urls restarted for errors")
59     else:
60         log = makelog("check.log")
61         log("check_urls started")
62         if report_stats:
63             print("   check_urls: normal start")
64
65     from storage import storage
66     storage = storage()
67
68     from robots import robot
69     robot = robot(log)
70
71     if report_stats:
72         sys.stdout.write("Loading %s: " % storage.filename)
73         sys.stdout.flush()
74
75     root_folder = storage.load()
76     from bkmk_objects import make_linear, break_tree
77     make_linear(root_folder)
78     objects = len(root_folder.linear)
79
80     if report_stats:
81         print("Ok")
82
83     if report_stats:
84         if only_errors:
85             s = "Rechecking errors: "
86         else:
87             s = "Checking: "
88         sys.stdout.write(s)
89         sys.stdout.flush()
90
91     if show_pbar:
92         pbar = ttyProgressBar(0, objects)
93
94     urls_no = 0
95     object_count = 0
96     size = 0
97
98     checked = {}
99     rcode = 1
100
101     for object_no in range(objects):
102         if show_pbar:
103             pbar.display(object_no+1)
104
105         object = root_folder.linear[object_no]
106         object_count = object_count + 1
107
108         if object.isBookmark:
109             href = object.href
110             if (href.startswith('place:') # Firefox SmartBookmarks
111                   or '%s' in href): # Bookmark with keyword
112                 log("Skipped %s" % href)
113                 continue
114
115             if only_errors:
116                 if hasattr(object, "error"):
117                     delattr(object, "error")
118                 else:
119                     continue
120
121             if checked.has_key(href):
122                 log("Already checked %s" % href)
123                 old_object = root_folder.linear[checked[href]]
124                 for attr_name in ("last_visit", "last_modified",
125                       "error", "no_error", "moved", "size", "md5", "real_title",
126                       "last_tested", "test_time", "icon", "charset"):
127                     if hasattr(old_object, attr_name):
128                         setattr(object, attr_name, getattr(old_object, attr_name))
129             else:
130                 log("Checking %s" % href)
131                 rcode = robot.check_url(object)
132
133                 if rcode:
134                     checked[href] = object_no
135                     urls_no = urls_no + 1
136                     try:
137                         size = size + int(object.size)
138                     except (AttributeError, TypeError, ValueError):
139                         pass # Some object does not have a size :(
140                 else:
141                     log("Interrupted by user (^C)")
142                     break
143     robot.stop()
144
145     if show_pbar:
146         del pbar
147
148     if report_stats:
149         print("Ok")
150         print(object_count, "objects passed")
151         print(urls_no, "URLs checked")
152         print(size, "bytes eaten")
153
154     break_tree(root_folder.linear)
155     storage.store(root_folder)
156
157     if rcode:
158         log("check_urls finished ok")
159     log.close()
160
161
162 if __name__ == '__main__':
163     run()