]> git.phdru.name Git - bookmarks_db.git/blob - check_urls.py
Style: Fix flake8 E401 multiple imports on one line
[bookmarks_db.git] / check_urls.py
1 #! /usr/bin/env python
2 """Robot interface - check URLs from bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import os
9 import sys
10
11
12 __author__ = "Oleg Broytman <phd@phdru.name>"
13 __copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
14 __license__ = "GNU GPL"
15
16
17 def run():
18     from getopt import getopt
19     optlist, args = getopt(sys.argv[1:], "ise")
20
21     show_pbar = 1
22     report_stats = 1
23     only_errors = 0
24
25     for _opt, _arg in optlist:
26         if _opt == '-i':
27             show_pbar = 0
28         if _opt == '-s':
29             report_stats = 0
30         if _opt == '-e':
31             only_errors = 1
32     try:
33         del _opt, _arg
34     except NameError:
35         pass
36
37     if report_stats:
38         print("Broytman check_urls, Copyright (C) 1997-2023 PhiloSoft Design")
39
40     if args:
41         sys.stderr.write("check_urls: too many arguments\n")
42         sys.stderr.write("Usage: check_urls [-ise]\n")
43         sys.exit(1)
44
45     if show_pbar:
46         show_pbar = sys.stderr.isatty()
47
48     if show_pbar:
49         try:
50             from m_lib.pbar.tty_pbar import ttyProgressBar
51         except ImportError:
52             show_pbar = 0
53
54     from m_lib.flog import makelog, openlog
55     if only_errors:
56         log = openlog("check.log")
57         log("chk_urls restarted for errors")
58         if report_stats:
59             print("chk_urls restarted for errors")
60     else:
61         log = makelog("check.log")
62         log("check_urls started")
63         if report_stats:
64             print("   check_urls: normal start")
65
66     from storage import storage
67     storage = storage()
68
69     from robots import robot
70     robot = robot(log)
71
72     if report_stats:
73         sys.stdout.write("Loading %s: " % storage.filename)
74         sys.stdout.flush()
75
76     root_folder = storage.load()
77     from bkmk_objects import make_linear, break_tree
78     make_linear(root_folder)
79     objects = len(root_folder.linear)
80
81     if report_stats:
82         print("Ok")
83
84     if report_stats:
85         if only_errors:
86             s = "Rechecking errors: "
87         else:
88             s = "Checking: "
89         sys.stdout.write(s)
90         sys.stdout.flush()
91
92     if show_pbar:
93         pbar = ttyProgressBar(0, objects)
94
95     urls_no = 0
96     object_count = 0
97     size = 0
98
99     checked = {}
100     rcode = 1
101
102     for object_no in range(objects):
103         if show_pbar:
104             pbar.display(object_no+1)
105
106         object = root_folder.linear[object_no]
107         object_count = object_count + 1
108
109         if object.isBookmark:
110             href = object.href
111             if (href.startswith('place:')  # Firefox SmartBookmarks
112                     or '%s' in href):  # Bookmark with keyword
113                 log("Skipped %s" % href)
114                 continue
115
116             if only_errors:
117                 if hasattr(object, "error"):
118                     delattr(object, "error")
119                 else:
120                     continue
121
122             if checked.has_key(href):
123                 log("Already checked %s" % href)
124                 old_object = root_folder.linear[checked[href]]
125                 for attr_name in (
126                     "last_visit", "last_modified",
127                     "error", "no_error", "moved", "size", "md5", "real_title",
128                     "last_tested", "test_time", "icon", "charset",
129                 ):
130                     if hasattr(old_object, attr_name):
131                         setattr(object, attr_name, getattr(old_object, attr_name))
132             else:
133                 log("Checking %s" % href)
134                 rcode = robot.check_url(object)
135
136                 if rcode:
137                     checked[href] = object_no
138                     urls_no = urls_no + 1
139                     try:
140                         size = size + int(object.size)
141                     except (AttributeError, TypeError, ValueError):
142                         pass  # Some object does not have a size :(
143                 else:
144                     log("Interrupted by user (^C)")
145                     break
146     robot.stop()
147
148     if show_pbar:
149         del pbar
150
151     if report_stats:
152         print("Ok")
153         print(object_count, "objects passed")
154         print(urls_no, "URLs checked")
155         print(size, "bytes eaten")
156
157     break_tree(root_folder.linear)
158     storage.store(root_folder)
159
160     if rcode:
161         log("check_urls finished ok")
162     log.close()
163
164
165 if __name__ == '__main__':
166     run()