X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=check_urls.py;h=f75cb16cc20f47ae769ee6775763e27a1293e9a9;hb=4fd6d1f55e5e5dbcc0bdc1b933d6e14076316437;hp=75f31559b89ab3b12216d0e93b7b3eadf8535e0f;hpb=9edef5a570ea28f7c06d8d92fdd70afe0ea86809;p=bookmarks_db.git diff --git a/check_urls.py b/check_urls.py index 75f3155..f75cb16 100755 --- a/check_urls.py +++ b/check_urls.py @@ -2,12 +2,11 @@ """ Robot interface - check URLs from bookmarks database - Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design + Written by Broytman. Copyright (C) 2000-2010 PhiloSoft Design. """ -import sys, os, urllib, tempfile -tempfname = "check_urls" + tempfile.gettempprefix() + "tmp" +import sys, os def run(): @@ -31,7 +30,7 @@ def run(): pass if report_stats: - print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design" + print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design" if args: sys.stderr.write("check_urls: too many arguments\n") @@ -63,14 +62,14 @@ def run(): storage = storage() from robots import robot - robot = robot(tempfname, log) + robot = robot(log) if report_stats: sys.stdout.write("Loading %s: " % storage.filename) sys.stdout.flush() root_folder = storage.load() - from bkmk_objects import make_linear + from bkmk_objects import make_linear, break_tree make_linear(root_folder) objects = len(root_folder.linear) @@ -103,6 +102,10 @@ def run(): object_count = object_count + 1 if object.isBookmark: + if object.href.startswith('place:'): # Firefox SmartBookmarks + log("Skipped %s" % object.href) + continue + if only_errors: if hasattr(object, "error"): delattr(object, "error") @@ -114,13 +117,12 @@ def run(): old_object = root_folder.linear[checked[object.href]] for attr_name in ("last_visit", "last_modified", "error", "no_error", "moved", "size", "md5", "real_title", - "last_tested", "test_time"): + "last_tested", "test_time", "icon", "charset"): if hasattr(old_object, attr_name): setattr(object, attr_name, getattr(old_object, attr_name)) else: - url_type, url_rest = urllib.splittype(object.href) - log("Checking %s:%s" % (url_type, url_rest)) - rcode = robot.check_url(object, url_type, url_rest) + log("Checking %s" % object.href) + rcode = robot.check_url(object) if rcode: checked[object.href] = object_no @@ -143,17 +145,13 @@ def run(): print urls_no, "URLs checked" print size, "bytes eaten" + break_tree(root_folder.linear) storage.store(root_folder) if rcode: log("check_urls finished ok") log.close() - try: - os.unlink(tempfname) - except os.error: - pass - if __name__ == '__main__': run()