X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=check_urls.py;h=a7314f3b789ee120cfb9d239fd05d1e14d3268a5;hb=b93d1659b23fe6d39a9909f72c5c8fa44ae2bb51;hp=2e9fe83c8a2a72328a7ff7d022e27d7f1edadff0;hpb=0e76f1851882b99da63a7c8a9e4cdf0c4a48657f;p=bookmarks_db.git diff --git a/check_urls.py b/check_urls.py index 2e9fe83..a7314f3 100755 --- a/check_urls.py +++ b/check_urls.py @@ -1,159 +1,163 @@ #! /usr/bin/env python -""" - Robot interface - check URLs from bookmarks database +"""Robot interface - check URLs from bookmarks database - Written by Broytman. Copyright (C) 2000-2007 PhiloSoft Design. +This file is a part of Bookmarks database and Internet robot. """ - +from __future__ import print_function import sys, os -tempfname = "check_urls.tmp" - - -def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "ise") - - show_pbar = 1 - report_stats = 1 - only_errors = 0 - - for _opt, _arg in optlist: - if _opt == '-i': - show_pbar = 0 - if _opt == '-s': - report_stats = 0 - if _opt == '-e': - only_errors = 1 - try: - del _opt, _arg - except NameError: - pass - - if report_stats: - print "Broytman check_urls, Copyright (C) 1997-2007 PhiloSoft Design" - - if args: - sys.stderr.write("check_urls: too many arguments\n") - sys.stderr.write("Usage: check_urls [-ise]\n") - sys.exit(1) - - if show_pbar: - show_pbar = sys.stderr.isatty() - - if show_pbar: - try: - from m_lib.pbar.tty_pbar import ttyProgressBar - except ImportError: - show_pbar = 0 - - from m_lib.flog import makelog, openlog - if only_errors: - log = openlog("check.log") - log("chk_urls restarted for errors") - if report_stats: - print "chk_urls restarted for errors" - else: - log = makelog("check.log") - log("check_urls started") - if report_stats: - print " check_urls: normal start" - - from storage import storage - storage = storage() - - from robots import robot - robot = robot(tempfname, log) - - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() - - root_folder = storage.load() - from bkmk_objects import make_linear, break_tree - make_linear(root_folder) - objects = len(root_folder.linear) - - if report_stats: - print "Ok" - - if report_stats: - if only_errors: - s = "Rechecking errors: " - else: - s = "Checking: " - sys.stdout.write(s) - sys.stdout.flush() - - if show_pbar: - pbar = ttyProgressBar(0, objects) - - urls_no = 0 - object_count = 0 - size = 0 - - checked = {} - rcode = 1 - - for object_no in range(objects): - if show_pbar: - pbar.display(object_no+1) - - object = root_folder.linear[object_no] - object_count = object_count + 1 - - if object.isBookmark: - if only_errors: - if hasattr(object, "error"): - delattr(object, "error") - else: - continue - - if checked.has_key(object.href): - log("Already checked %s" % object.href) - old_object = root_folder.linear[checked[object.href]] - for attr_name in ("last_visit", "last_modified", - "error", "no_error", "moved", "size", "md5", "real_title", - "last_tested", "test_time", "icon", "charset"): - if hasattr(old_object, attr_name): - setattr(object, attr_name, getattr(old_object, attr_name)) - else: - log("Checking %s" % object.href) - rcode = robot.check_url(object) - - if rcode: - checked[object.href] = object_no - urls_no = urls_no + 1 - try: - size = size + int(object.size) - except (AttributeError, TypeError, ValueError): - pass # Some object does not have a size :( - else: - log("Interrupted by user (^C)") - break - robot.stop() - - if show_pbar: - del pbar - if report_stats: - print "Ok" - print object_count, "objects passed" - print urls_no, "URLs checked" - print size, "bytes eaten" - break_tree(root_folder.linear) - storage.store(root_folder) +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" +__license__ = "GNU GPL" - if rcode: - log("check_urls finished ok") - log.close() - try: - os.unlink(tempfname) - except os.error: - pass +def run(): + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "ise") + + show_pbar = 1 + report_stats = 1 + only_errors = 0 + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + if _opt == '-e': + only_errors = 1 + try: + del _opt, _arg + except NameError: + pass + + if report_stats: + print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design") + + if args: + sys.stderr.write("check_urls: too many arguments\n") + sys.stderr.write("Usage: check_urls [-ise]\n") + sys.exit(1) + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from m_lib.pbar.tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + from m_lib.flog import makelog, openlog + if only_errors: + log = openlog("check.log") + log("chk_urls restarted for errors") + if report_stats: + print("chk_urls restarted for errors") + else: + log = makelog("check.log") + log("check_urls started") + if report_stats: + print(" check_urls: normal start") + + from storage import storage + storage = storage() + + from robots import robot + robot = robot(log) + + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() + + root_folder = storage.load() + from bkmk_objects import make_linear, break_tree + make_linear(root_folder) + objects = len(root_folder.linear) + + if report_stats: + print("Ok") + + if report_stats: + if only_errors: + s = "Rechecking errors: " + else: + s = "Checking: " + sys.stdout.write(s) + sys.stdout.flush() + + if show_pbar: + pbar = ttyProgressBar(0, objects) + + urls_no = 0 + object_count = 0 + size = 0 + + checked = {} + rcode = 1 + + for object_no in range(objects): + if show_pbar: + pbar.display(object_no+1) + + object = root_folder.linear[object_no] + object_count = object_count + 1 + + if object.isBookmark: + href = object.href + if (href.startswith('place:') # Firefox SmartBookmarks + or '%s' in href): # Bookmark with keyword + log("Skipped %s" % href) + continue + + if only_errors: + if hasattr(object, "error"): + delattr(object, "error") + else: + continue + + if checked.has_key(href): + log("Already checked %s" % href) + old_object = root_folder.linear[checked[href]] + for attr_name in ("last_visit", "last_modified", + "error", "no_error", "moved", "size", "md5", "real_title", + "last_tested", "test_time", "icon", "charset"): + if hasattr(old_object, attr_name): + setattr(object, attr_name, getattr(old_object, attr_name)) + else: + log("Checking %s" % href) + rcode = robot.check_url(object) + + if rcode: + checked[href] = object_no + urls_no = urls_no + 1 + try: + size = size + int(object.size) + except (AttributeError, TypeError, ValueError): + pass # Some object does not have a size :( + else: + log("Interrupted by user (^C)") + break + robot.stop() + + if show_pbar: + del pbar + + if report_stats: + print("Ok") + print(object_count, "objects passed") + print(urls_no, "URLs checked") + print(size, "bytes eaten") + + break_tree(root_folder.linear) + storage.store(root_folder) + + if rcode: + log("check_urls finished ok") + log.close() if __name__ == '__main__': - run() + run()