#! /usr/bin/env python
-"""
- Robot interface - check URLs from bookmarks database
+"""Robot interface - check URLs from bookmarks database
- Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+This file is a part of Bookmarks database and Internet robot.
"""
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__license__ = "GNU GPL"
-import sys, os, urllib, tempfile
-tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+import sys, os
def run():
pass
if report_stats:
- print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design"
+ print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
if args:
sys.stderr.write("check_urls: too many arguments\n")
storage = storage()
from robots import robot
- robot = robot(tempfname, log)
+ robot = robot(log)
if report_stats:
sys.stdout.write("Loading %s: " % storage.filename)
sys.stdout.flush()
root_folder = storage.load()
- from bkmk_objects import make_linear
+ from bkmk_objects import make_linear, break_tree
make_linear(root_folder)
objects = len(root_folder.linear)
object_count = object_count + 1
if object.isBookmark:
+ href = object.href
+ if (href.startswith('place:') # Firefox SmartBookmarks
+ or '%s' in href): # Bookmark with keyword
+ log("Skipped %s" % href)
+ continue
+
if only_errors:
if hasattr(object, "error"):
delattr(object, "error")
else:
continue
- if checked.has_key(object.href):
- log("Already checked %s" % object.href)
- old_object = root_folder.linear[checked[object.href]]
+ if checked.has_key(href):
+ log("Already checked %s" % href)
+ old_object = root_folder.linear[checked[href]]
for attr_name in ("last_visit", "last_modified",
"error", "no_error", "moved", "size", "md5", "real_title",
- "last_tested", "test_time"):
+ "last_tested", "test_time", "icon", "charset"):
if hasattr(old_object, attr_name):
setattr(object, attr_name, getattr(old_object, attr_name))
else:
- url_type, url_rest = urllib.splittype(object.href)
- log("Checking %s:%s" % (url_type, url_rest))
- rcode = robot.check_url(object, url_type, url_rest)
+ log("Checking %s" % href)
+ rcode = robot.check_url(object)
if rcode:
- checked[object.href] = object_no
+ checked[href] = object_no
urls_no = urls_no + 1
try:
size = size + int(object.size)
print urls_no, "URLs checked"
print size, "bytes eaten"
+ break_tree(root_folder.linear)
storage.store(root_folder)
if rcode:
log("check_urls finished ok")
log.close()
- try:
- os.unlink(tempfname)
- except os.error:
- pass
-
if __name__ == '__main__':
run()