]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Fix(Py3): Reconfigure logs to write in UTF-8
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python3
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import sys
9
10
11 __author__ = "Oleg Broytman <phd@phdru.name>"
12 __copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
13 __license__ = "GNU GPL"
14
15
16 def run():
17     from getopt import getopt
18     optlist, args = getopt(sys.argv[1:], "ise")
19
20     show_pbar = 1
21     report_stats = 1
22     only_errors = 0
23
24     for _opt, _arg in optlist:
25         if _opt == '-i':
26             show_pbar = 0
27         if _opt == '-s':
28             report_stats = 0
29         if _opt == '-e':
30             only_errors = 1
31     try:
32         del _opt, _arg
33     except NameError:
34         pass
35
36     if report_stats:
37         print("Broytman check_urls, Copyright (C) 1997-2023 PhiloSoft Design")
38
39     if args:
40         sys.stderr.write("check_urls: too many arguments\n")
41         sys.stderr.write("Usage: check_urls [-ise]\n")
42         sys.exit(1)
43
44     if show_pbar:
45         show_pbar = sys.stderr.isatty()
46
47     if show_pbar:
48         try:
49             from m_lib.pbar.tty_pbar import ttyProgressBar
50         except ImportError:
51             show_pbar = 0
52
53     from m_lib.flog import makelog, openlog
54     if only_errors:
55         log = openlog("check.log")
56         log("chk_urls restarted for errors")
57         if report_stats:
58             print("chk_urls restarted for errors")
59     else:
60         log = makelog("check.log")
61         log("check_urls started")
62         if report_stats:
63             print("   check_urls: normal start")
64     log.outfile.reconfigure(encoding='utf-8')
65
66     from storage import storage
67     storage = storage()
68
69     from robots import robot
70     robot = robot(log)
71
72     if report_stats:
73         sys.stdout.write("Loading %s: " % storage.filename)
74         sys.stdout.flush()
75
76     root_folder = storage.load()
77     from bkmk_objects import make_linear, break_tree
78     make_linear(root_folder)
79     objects = len(root_folder.linear)
80
81     if report_stats:
82         print("Ok")
83
84     if report_stats:
85         if only_errors:
86             s = "Rechecking errors: "
87         else:
88             s = "Checking: "
89         sys.stdout.write(s)
90         sys.stdout.flush()
91
92     if show_pbar:
93         pbar = ttyProgressBar(0, objects)
94
95     urls_no = 0
96     object_count = 0
97     size = 0
98
99     checked = {}
100     rcode = 1
101
102     for object_no in range(objects):
103         if show_pbar:
104             pbar.display(object_no+1)
105
106         object = root_folder.linear[object_no]
107         object_count = object_count + 1
108
109         if object.isBookmark:
110             href = object.href
111             if (href.startswith('place:')  # Firefox SmartBookmarks
112                     or '%s' in href):  # Bookmark with keyword
113                 log("Skipped %s" % href)
114                 continue
115
116             if only_errors:
117                 if hasattr(object, "error"):
118                     delattr(object, "error")
119                 else:
120                     continue
121
122             if href in checked:
123                 log("Already checked %s" % href)
124                 old_object = root_folder.linear[checked[href]]
125                 for attr_name in (
126                     "last_visit", "last_modified",
127                     "error", "no_error", "moved", "size", "md5", "real_title",
128                     "last_tested", "test_time", "icon", "charset",
129                 ):
130                     if hasattr(old_object, attr_name):
131                         setattr(object, attr_name,
132                                 getattr(old_object, attr_name))
133             else:
134                 log("Checking %s" % href)
135                 rcode = robot.check_url(object)
136
137                 if rcode:
138                     checked[href] = object_no
139                     urls_no = urls_no + 1
140                     try:
141                         size = size + int(object.size)
142                     except (AttributeError, TypeError, ValueError):
143                         pass  # Some object does not have a size :(
144                 else:
145                     log("Interrupted by user (^C)")
146                     break
147     robot.stop()
148
149     if show_pbar:
150         del pbar
151
152     if report_stats:
153         print("Ok")
154         print(object_count, "objects passed")
155         print(urls_no, "URLs checked")
156         print(size, "bytes eaten")
157
158     break_tree(root_folder.linear)
159     storage.store(root_folder)
160
161     if rcode:
162         log("check_urls finished ok")
163     log.close()
164
165
166 if __name__ == '__main__':
167     run()