X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=blobdiff_plain;f=check_urls.py;h=a7314f3b789ee120cfb9d239fd05d1e14d3268a5;hp=4081637952acef7cf072efbe7923c7e8f7f3e4db;hb=HEAD;hpb=7028dcb5fe31ccf6d348dca315154d719f3c481e diff --git a/check_urls.py b/check_urls.py index 4081637..ecf8ad6 100755 --- a/check_urls.py +++ b/check_urls.py @@ -1,158 +1,84 @@ -#! /usr/bin/env python -""" - Robot interface - check URLs from bookmarks database +#! /usr/bin/env python3 +"""Robot interface - check URLs from the command line - Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design. +This file is a part of Bookmarks database and Internet robot. """ +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design" +__license__ = "GNU GPL" + +import sys + +from bkmk_objects import Bookmark +from Writers.bkmk_wflad import strftime + +try: + import httplib +except ImportError: + pass +else: + HTTP = httplib.HTTP + + class MyHTTP(HTTP): + def _setup(self, conn): + HTTP._setup(self, conn) + self.set_debuglevel(1) -import sys, os, tempfile -tempfname = "check_urls" + tempfile.gettempprefix() + "tmp" + httplib.HTTP = MyHTTP def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "ise") - - show_pbar = 1 - report_stats = 1 - only_errors = 0 - - for _opt, _arg in optlist: - if _opt == '-i': - show_pbar = 0 - if _opt == '-s': - report_stats = 0 - if _opt == '-e': - only_errors = 1 - try: - del _opt, _arg - except NameError: - pass - - if report_stats: - print "BroytMann check_urls, Copyright (C) 1997-2007 PhiloSoft Design" - - if args: - sys.stderr.write("check_urls: too many arguments\n") - sys.stderr.write("Usage: check_urls [-ise]\n") - sys.exit(1) - - if show_pbar: - show_pbar = sys.stderr.isatty() - - if show_pbar: - try: - from m_lib.pbar.tty_pbar import ttyProgressBar - except ImportError: - show_pbar = 0 - - from m_lib.flog import makelog, openlog - if only_errors: - log = openlog("check.log") - log("chk_urls restarted for errors") - if report_stats: - print "chk_urls restarted for errors" - else: - log = makelog("check.log") - log("check_urls started") - if report_stats: - print " check_urls: normal start" - - from storage import storage - storage = storage() - - from robots import robot - robot = robot(tempfname, log) - - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() - - root_folder = storage.load() - from bkmk_objects import make_linear - make_linear(root_folder) - objects = len(root_folder.linear) - - if report_stats: - print "Ok" - - if report_stats: - if only_errors: - s = "Rechecking errors: " - else: - s = "Checking: " - sys.stdout.write(s) - sys.stdout.flush() - - if show_pbar: - pbar = ttyProgressBar(0, objects) - - urls_no = 0 - object_count = 0 - size = 0 - - checked = {} - rcode = 1 - - for object_no in range(objects): - if show_pbar: - pbar.display(object_no+1) - - object = root_folder.linear[object_no] - object_count = object_count + 1 - - if object.isBookmark: - if only_errors: - if hasattr(object, "error"): - delattr(object, "error") - else: - continue - - if checked.has_key(object.href): - log("Already checked %s" % object.href) - old_object = root_folder.linear[checked[object.href]] - for attr_name in ("last_visit", "last_modified", - "error", "no_error", "moved", "size", "md5", "real_title", - "last_tested", "test_time"): - if hasattr(old_object, attr_name): - setattr(object, attr_name, getattr(old_object, attr_name)) - else: - log("Checking %s" % object.href) - rcode = robot.check_url(object) - - if rcode: - checked[object.href] = object_no - urls_no = urls_no + 1 - try: - size = size + int(object.size) - except (AttributeError, TypeError, ValueError): - pass # Some object does not have a size :( - else: - log("Interrupted by user (^C)") - break - robot.stop() - - if show_pbar: - del pbar - - if report_stats: - print "Ok" - print object_count, "objects passed" - print urls_no, "URLs checked" - print size, "bytes eaten" - - storage.store(root_folder) - - if rcode: - log("check_urls finished ok") - log.close() - - try: - os.unlink(tempfname) - except os.error: - pass + print("Broytman check_urls, Copyright (C) 2010-2024 PhiloSoft Design") + + if len(sys.argv) < 2: + sys.stderr.write("Usage: check_urls.py url1 [url2...]\n") + sys.exit(1) + + from m_lib.flog import makelog + log = makelog("check.log") + log.outfile.reconfigure(encoding='utf-8') + sys.stdout.reconfigure(encoding='utf-8') + + from robots import robot + robot = robot(log) + + for url in sys.argv[1:]: + bookmark = Bookmark(href=url, add_date=None) + bookmark.parent = None + + rcode = robot.check_url(bookmark) + print("check_urls: %s" % rcode) + + if hasattr(bookmark, 'error'): + print(bookmark.error) + + else: + print("""\ + Title: %s + URL: %s + LastModified: %s + Moved: %s + Size: %s + Md5: %s + IconURI: %s + Icon: %s + Charset: %s + """ % ( + getattr(bookmark, 'real_title', None) + or getattr(bookmark, 'title', None), + bookmark.href, + strftime(bookmark.last_modified), + getattr(bookmark, 'moved', None), + getattr(bookmark, 'size', None), + getattr(bookmark, 'md5', None), + bookmark.icon_href, bookmark.icon, bookmark.charset, + ) + ) + + robot.stop() + log.close() if __name__ == '__main__': - run() + run()