-#! /usr/bin/env python
-"""
- Robot interface - check URLs from bookmarks database
+#! /usr/bin/env python3
+"""Robot interface - check URLs from the command line
- Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
+This file is a part of Bookmarks database and Internet robot.
"""
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+import sys
+
+from bkmk_objects import Bookmark
+from Writers.bkmk_wflad import strftime
+
+try:
+ import httplib
+except ImportError:
+ pass
+else:
+ HTTP = httplib.HTTP
+
+ class MyHTTP(HTTP):
+ def _setup(self, conn):
+ HTTP._setup(self, conn)
+ self.set_debuglevel(1)
-import sys, os, tempfile
-tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+ httplib.HTTP = MyHTTP
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "ise")
-
- show_pbar = 1
- report_stats = 1
- only_errors = 0
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- if _opt == '-e':
- only_errors = 1
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if report_stats:
- print "BroytMann check_urls, Copyright (C) 1997-2007 PhiloSoft Design"
-
- if args:
- sys.stderr.write("check_urls: too many arguments\n")
- sys.stderr.write("Usage: check_urls [-ise]\n")
- sys.exit(1)
-
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from m_lib.pbar.tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
-
- from m_lib.flog import makelog, openlog
- if only_errors:
- log = openlog("check.log")
- log("chk_urls restarted for errors")
- if report_stats:
- print "chk_urls restarted for errors"
- else:
- log = makelog("check.log")
- log("check_urls started")
- if report_stats:
- print " check_urls: normal start"
-
- from storage import storage
- storage = storage()
-
- from robots import robot
- robot = robot(tempfname, log)
-
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
-
- root_folder = storage.load()
- from bkmk_objects import make_linear, break_tree
- make_linear(root_folder)
- objects = len(root_folder.linear)
-
- if report_stats:
- print "Ok"
-
- if report_stats:
- if only_errors:
- s = "Rechecking errors: "
- else:
- s = "Checking: "
- sys.stdout.write(s)
- sys.stdout.flush()
-
- if show_pbar:
- pbar = ttyProgressBar(0, objects)
-
- urls_no = 0
- object_count = 0
- size = 0
-
- checked = {}
- rcode = 1
-
- for object_no in range(objects):
- if show_pbar:
- pbar.display(object_no+1)
-
- object = root_folder.linear[object_no]
- object_count = object_count + 1
-
- if object.isBookmark:
- if only_errors:
- if hasattr(object, "error"):
- delattr(object, "error")
- else:
- continue
-
- if checked.has_key(object.href):
- log("Already checked %s" % object.href)
- old_object = root_folder.linear[checked[object.href]]
- for attr_name in ("last_visit", "last_modified",
- "error", "no_error", "moved", "size", "md5", "real_title",
- "last_tested", "test_time"):
- if hasattr(old_object, attr_name):
- setattr(object, attr_name, getattr(old_object, attr_name))
- else:
- log("Checking %s" % object.href)
- rcode = robot.check_url(object)
-
- if rcode:
- checked[object.href] = object_no
- urls_no = urls_no + 1
- try:
- size = size + int(object.size)
- except (AttributeError, TypeError, ValueError):
- pass # Some object does not have a size :(
- else:
- log("Interrupted by user (^C)")
- break
- robot.stop()
-
- if show_pbar:
- del pbar
-
- if report_stats:
- print "Ok"
- print object_count, "objects passed"
- print urls_no, "URLs checked"
- print size, "bytes eaten"
-
- break_tree(root_folder.linear)
- storage.store(root_folder)
-
- if rcode:
- log("check_urls finished ok")
- log.close()
-
- try:
- os.unlink(tempfname)
- except os.error:
- pass
+ print("Broytman check_urls, Copyright (C) 2010-2024 PhiloSoft Design")
+
+ if len(sys.argv) < 2:
+ sys.stderr.write("Usage: check_urls.py url1 [url2...]\n")
+ sys.exit(1)
+
+ from m_lib.flog import makelog
+ log = makelog("check.log")
+ log.outfile.reconfigure(encoding='utf-8')
+ sys.stdout.reconfigure(encoding='utf-8')
+
+ from robots import robot
+ robot = robot(log)
+
+ for url in sys.argv[1:]:
+ bookmark = Bookmark(href=url, add_date=None)
+ bookmark.parent = None
+
+ rcode = robot.check_url(bookmark)
+ print("check_urls: %s" % rcode)
+
+ if hasattr(bookmark, 'error'):
+ print(bookmark.error)
+
+ else:
+ print("""\
+ Title: %s
+ URL: %s
+ LastModified: %s
+ Moved: %s
+ Size: %s
+ Md5: %s
+ IconURI: %s
+ Icon: %s
+ Charset: %s
+ """ % (
+ getattr(bookmark, 'real_title', None)
+ or getattr(bookmark, 'title', None),
+ bookmark.href,
+ strftime(bookmark.last_modified),
+ getattr(bookmark, 'moved', None),
+ getattr(bookmark, 'size', None),
+ getattr(bookmark, 'md5', None),
+ bookmark.icon_href, bookmark.icon, bookmark.charset,
+ )
+ )
+
+ robot.stop()
+ log.close()
if __name__ == '__main__':
- run()
+ run()