]> git.phdru.name Git - bookmarks_db.git/blob - check_urls_db.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_urls_db.py
1 #! /usr/bin/env python3
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import sys
9
10
11 __author__ = "Oleg Broytman <phd@phdru.name>"
12 __copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
13 __license__ = "GNU GPL"
14
15
16 def run():
17     from getopt import getopt
18     optlist, args = getopt(sys.argv[1:], "ise")
19
20     show_pbar = 1
21     report_stats = 1
22     only_errors = 0
23
24     for _opt, _arg in optlist:
25         if _opt == '-i':
26             show_pbar = 0
27         if _opt == '-s':
28             report_stats = 0
29         if _opt == '-e':
30             only_errors = 1
31     try:
32         del _opt, _arg
33     except NameError:
34         pass
35
36     if report_stats:
37         print("Broytman check_urls_db, "
38               "Copyright (C) 1997-2024 PhiloSoft Design")
39
40     if args:
41         sys.stderr.write("check_urls_db: too many arguments\n")
42         sys.stderr.write("Usage: check_urls_db [-ise]\n")
43         sys.exit(1)
44
45     if show_pbar:
46         show_pbar = sys.stderr.isatty()
47
48     if show_pbar:
49         try:
50             from m_lib.pbar.tty_pbar import ttyProgressBar
51         except ImportError:
52             show_pbar = 0
53
54     from m_lib.flog import makelog, openlog
55     if only_errors:
56         log = openlog("check.log")
57         log("chk_urls restarted for errors")
58         if report_stats:
59             print("chk_urls restarted for errors")
60     else:
61         log = makelog("check.log")
62         log("check_urls_db started")
63         if report_stats:
64             print("   check_urls_db: normal start")
65     log.outfile.reconfigure(encoding='utf-8')
66
67     from storage import storage
68     storage = storage()
69
70     from robots import robot
71     robot = robot(log)
72
73     if report_stats:
74         sys.stdout.write("Loading %s: " % storage.filename)
75         sys.stdout.flush()
76
77     root_folder = storage.load()
78     from bkmk_objects import make_linear, break_tree
79     make_linear(root_folder)
80     objects = len(root_folder.linear)
81
82     if report_stats:
83         print("Ok")
84
85     if report_stats:
86         if only_errors:
87             s = "Rechecking errors: "
88         else:
89             s = "Checking: "
90         sys.stdout.write(s)
91         sys.stdout.flush()
92
93     if show_pbar:
94         pbar = ttyProgressBar(0, objects)
95
96     urls_no = 0
97     object_count = 0
98     size = 0
99
100     checked = {}
101     rcode = 1
102
103     for object_no in range(objects):
104         if show_pbar:
105             pbar.display(object_no+1)
106
107         object = root_folder.linear[object_no]
108         object_count = object_count + 1
109
110         if object.isBookmark:
111             href = object.href
112             if (href.startswith('place:')  # Firefox SmartBookmarks
113                     or '%s' in href):  # Bookmark with keyword
114                 log("Skipped %s" % href)
115                 continue
116
117             if only_errors:
118                 if hasattr(object, "error"):
119                     delattr(object, "error")
120                 else:
121                     continue
122
123             if href in checked:
124                 log("Already checked %s" % href)
125                 old_object = root_folder.linear[checked[href]]
126                 for attr_name in (
127                     "last_visit", "last_modified",
128                     "error", "no_error", "moved", "size", "md5", "real_title",
129                     "last_tested", "test_time", "icon", "charset",
130                 ):
131                     if hasattr(old_object, attr_name):
132                         setattr(object, attr_name,
133                                 getattr(old_object, attr_name))
134             else:
135                 log("Checking %s" % href)
136                 rcode = robot.check_url(object)
137
138                 if rcode:
139                     checked[href] = object_no
140                     urls_no = urls_no + 1
141                     try:
142                         size = size + int(object.size)
143                     except (AttributeError, TypeError, ValueError):
144                         pass  # Some object does not have a size :(
145                 else:
146                     log("Interrupted by user (^C)")
147                     break
148     robot.stop()
149
150     if show_pbar:
151         del pbar
152
153     if report_stats:
154         print("Ok")
155         print(object_count, "objects passed")
156         print(urls_no, "URLs checked")
157         print(size, "bytes eaten")
158
159     break_tree(root_folder.linear)
160     storage.store(root_folder)
161
162     if rcode:
163         log("check_urls_db finished ok")
164     log.close()
165
166
167 if __name__ == '__main__':
168     run()