TODO: Configuration file

[bookmarks_db.git] / check_urls.py
diff --git a/check_urls.py b/check_urls.py

index 497c081c950ae03ebd133cc1fcf6a6436a2424a0..e2dac0ff3c516a65ed9f23b22d37ce050bbe6030 100755 (executable)
--- a/check_urls.py
+++ b/check_urls.py
@@ -1,13 +1,14 @@
-#! /usr/local/bin/python -O
-"""
-   Robot interface - check URLs from bookmarks database
+#! /usr/bin/env python
+"""Robot interface - check URLs from bookmarks database
  
-   Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+This file is a part of Bookmarks database and Internet robot.
  """
  
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__license__ = "GNU GPL"
  
-import sys, os, urllib, tempfile
-tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+import sys, os
  
  
  def run():
@@ -31,7 +32,7 @@ def run():
        pass
  
     if report_stats:
-      print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design"
+      print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
  
     if args:
        sys.stderr.write("check_urls: too many arguments\n")
@@ -63,14 +64,14 @@ def run():
     storage = storage()
  
     from robots import robot
-   robot = robot(tempfname, log)
+   robot = robot(log)
  
     if report_stats:
        sys.stdout.write("Loading %s: " % storage.filename)
        sys.stdout.flush()
  
     root_folder = storage.load()
-   from bkmk_objects import make_linear
+   from bkmk_objects import make_linear, break_tree
     make_linear(root_folder)
     objects = len(root_folder.linear)
  
@@ -103,27 +104,32 @@ def run():
        object_count = object_count + 1
  
        if object.isBookmark:
+         href = object.href
+         if (href.startswith('place:') # Firefox SmartBookmarks
+               or '%s' in href): # Bookmark with keyword
+            log("Skipped %s" % href)
+            continue
+
           if only_errors:
              if hasattr(object, "error"):
                 delattr(object, "error")
              else:
                 continue
  
-         if checked.has_key(object.href):
-            log("Already checked %s" % object.href)
-            old_object = root_folder.linear[checked[object.href]]
+         if checked.has_key(href):
+            log("Already checked %s" % href)
+            old_object = root_folder.linear[checked[href]]
              for attr_name in ("last_visit", "last_modified",
                    "error", "no_error", "moved", "size", "md5", "real_title",
-                  "last_tested", "test_time"):
+                  "last_tested", "test_time", "icon", "charset"):
                 if hasattr(old_object, attr_name):
                    setattr(object, attr_name, getattr(old_object, attr_name))
           else:
-            url_type, url_rest = urllib.splittype(object.href)
-            log("Checking %s:%s" % (url_type, url_rest))
-            rcode = robot.check_url(object, url_type, url_rest)
+            log("Checking %s" % href)
+            rcode = robot.check_url(object)
  
              if rcode:
-               checked[object.href] = object_no
+               checked[href] = object_no
                 urls_no = urls_no + 1
                 try:
                    size = size + int(object.size)
@@ -143,17 +149,13 @@ def run():
        print urls_no, "URLs checked"
        print size, "bytes eaten"
  
+   break_tree(root_folder.linear)
     storage.store(root_folder)
  
     if rcode:
        log("check_urls finished ok")
     log.close()
  
-   try:
-      os.unlink(tempfname)
-   except os.error:
-      pass
-
  
  if __name__ == '__main__':
     run()