#! /usr/bin/env python
-"""
- Robot interface - check URLs from bookmarks database
+"""Robot interface - check URLs from bookmarks database
- Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+This file is a part of Bookmarks database and Internet robot.
"""
+from __future__ import print_function
+import sys, os
+
-import sys, os, urllib, tempfile
-tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__license__ = "GNU GPL"
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "ise")
-
- show_pbar = 1
- report_stats = 1
- only_errors = 0
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- if _opt == '-e':
- only_errors = 1
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if report_stats:
- print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design"
-
- if args:
- sys.stderr.write("check_urls: too many arguments\n")
- sys.stderr.write("Usage: check_urls [-ise]\n")
- sys.exit(1)
-
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from m_lib.pbar.tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
-
- from m_lib.flog import makelog, openlog
- if only_errors:
- log = openlog("check.log")
- log("chk_urls restarted for errors")
- if report_stats:
- print "chk_urls restarted for errors"
- else:
- log = makelog("check.log")
- log("check_urls started")
- if report_stats:
- print " check_urls: normal start"
-
- from storage import storage
- storage = storage()
-
- from robots import robot
- robot = robot(tempfname, log)
-
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
-
- root_folder = storage.load()
- from bkmk_objects import make_linear
- make_linear(root_folder)
- objects = len(root_folder.linear)
-
- if report_stats:
- print "Ok"
-
- if report_stats:
- if only_errors:
- s = "Rechecking errors: "
- else:
- s = "Checking: "
- sys.stdout.write(s)
- sys.stdout.flush()
-
- if show_pbar:
- pbar = ttyProgressBar(0, objects)
-
- urls_no = 0
- object_count = 0
- size = 0
-
- checked = {}
- rcode = 1
-
- for object_no in range(objects):
- if show_pbar:
- pbar.display(object_no+1)
-
- object = root_folder.linear[object_no]
- object_count = object_count + 1
-
- if object.isBookmark:
- if only_errors:
- if hasattr(object, "error"):
- delattr(object, "error")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "ise")
+
+ show_pbar = 1
+ report_stats = 1
+ only_errors = 0
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-e':
+ only_errors = 1
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print("Broytman check_urls, Copyright (C) 1997-2023 PhiloSoft Design")
+
+ if args:
+ sys.stderr.write("check_urls: too many arguments\n")
+ sys.stderr.write("Usage: check_urls [-ise]\n")
+ sys.exit(1)
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from m_lib.pbar.tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ from m_lib.flog import makelog, openlog
+ if only_errors:
+ log = openlog("check.log")
+ log("chk_urls restarted for errors")
+ if report_stats:
+ print("chk_urls restarted for errors")
+ else:
+ log = makelog("check.log")
+ log("check_urls started")
+ if report_stats:
+ print(" check_urls: normal start")
+
+ from storage import storage
+ storage = storage()
+
+ from robots import robot
+ robot = robot(log)
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear, break_tree
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print("Ok")
+
+ if report_stats:
+ if only_errors:
+ s = "Rechecking errors: "
+ else:
+ s = "Checking: "
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, objects)
+
+ urls_no = 0
+ object_count = 0
+ size = 0
+
+ checked = {}
+ rcode = 1
+
+ for object_no in range(objects):
+ if show_pbar:
+ pbar.display(object_no+1)
+
+ object = root_folder.linear[object_no]
+ object_count = object_count + 1
+
+ if object.isBookmark:
+ href = object.href
+ if (href.startswith('place:') # Firefox SmartBookmarks
+ or '%s' in href): # Bookmark with keyword
+ log("Skipped %s" % href)
+ continue
+
+ if only_errors:
+ if hasattr(object, "error"):
+ delattr(object, "error")
+ else:
+ continue
+
+ if checked.has_key(href):
+ log("Already checked %s" % href)
+ old_object = root_folder.linear[checked[href]]
+ for attr_name in (
+ "last_visit", "last_modified",
+ "error", "no_error", "moved", "size", "md5", "real_title",
+ "last_tested", "test_time", "icon", "charset",
+ ):
+ if hasattr(old_object, attr_name):
+ setattr(object, attr_name, getattr(old_object, attr_name))
else:
- continue
-
- if checked.has_key(object.href):
- log("Already checked %s" % object.href)
- old_object = root_folder.linear[checked[object.href]]
- for attr_name in ("last_visit", "last_modified",
- "error", "no_error", "moved", "size", "md5", "real_title",
- "last_tested", "test_time"):
- if hasattr(old_object, attr_name):
- setattr(object, attr_name, getattr(old_object, attr_name))
- else:
- url_type, url_rest = urllib.splittype(object.href)
- log("Checking %s:%s" % (url_type, url_rest))
- rcode = robot.check_url(object, url_type, url_rest)
-
- if rcode:
- checked[object.href] = object_no
- urls_no = urls_no + 1
- try:
- size = size + int(object.size)
- except (AttributeError, TypeError, ValueError):
- pass # Some object does not have a size :(
- else:
- log("Interrupted by user (^C)")
- break
- robot.stop()
-
- if show_pbar:
- del pbar
-
- if report_stats:
- print "Ok"
- print object_count, "objects passed"
- print urls_no, "URLs checked"
- print size, "bytes eaten"
-
- storage.store(root_folder)
-
- if rcode:
- log("check_urls finished ok")
- log.close()
-
- try:
- os.unlink(tempfname)
- except os.error:
- pass
+ log("Checking %s" % href)
+ rcode = robot.check_url(object)
+
+ if rcode:
+ checked[href] = object_no
+ urls_no = urls_no + 1
+ try:
+ size = size + int(object.size)
+ except (AttributeError, TypeError, ValueError):
+ pass # Some object does not have a size :(
+ else:
+ log("Interrupted by user (^C)")
+ break
+ robot.stop()
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print("Ok")
+ print(object_count, "objects passed")
+ print(urls_no, "URLs checked")
+ print(size, "bytes eaten")
+
+ break_tree(root_folder.linear)
+ storage.store(root_folder)
+
+ if rcode:
+ log("check_urls finished ok")
+ log.close()
if __name__ == '__main__':
- run()
+ run()