]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_urls.py
TODO: Configuration file
[bookmarks_db.git] / check_urls.py
index f75cb16cc20f47ae769ee6775763e27a1293e9a9..e2dac0ff3c516a65ed9f23b22d37ce050bbe6030 100755 (executable)
@@ -1,10 +1,12 @@
 #! /usr/bin/env python
-"""
-   Robot interface - check URLs from bookmarks database
+"""Robot interface - check URLs from bookmarks database
 
-   Written by Broytman. Copyright (C) 2000-2010 PhiloSoft Design.
+This file is a part of Bookmarks database and Internet robot.
 """
 
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__license__ = "GNU GPL"
 
 import sys, os
 
@@ -102,8 +104,10 @@ def run():
       object_count = object_count + 1
 
       if object.isBookmark:
-         if object.href.startswith('place:'): # Firefox SmartBookmarks
-            log("Skipped %s" % object.href)
+         href = object.href
+         if (href.startswith('place:') # Firefox SmartBookmarks
+               or '%s' in href): # Bookmark with keyword
+            log("Skipped %s" % href)
             continue
 
          if only_errors:
@@ -112,20 +116,20 @@ def run():
             else:
                continue
 
-         if checked.has_key(object.href):
-            log("Already checked %s" % object.href)
-            old_object = root_folder.linear[checked[object.href]]
+         if checked.has_key(href):
+            log("Already checked %s" % href)
+            old_object = root_folder.linear[checked[href]]
             for attr_name in ("last_visit", "last_modified",
                   "error", "no_error", "moved", "size", "md5", "real_title",
                   "last_tested", "test_time", "icon", "charset"):
                if hasattr(old_object, attr_name):
                   setattr(object, attr_name, getattr(old_object, attr_name))
          else:
-            log("Checking %s" % object.href)
+            log("Checking %s" % href)
             rcode = robot.check_url(object)
 
             if rcode:
-               checked[object.href] = object_no
+               checked[href] = object_no
                urls_no = urls_no + 1
                try:
                   size = size + int(object.size)