]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_urls.py
Renamed parse_html modules to bkmk_ph_* to avoid name clashes.
[bookmarks_db.git] / check_urls.py
index 4081637952acef7cf072efbe7923c7e8f7f3e4db..305458028b38fb51caee8b747bf60041943584a3 100755 (executable)
@@ -1,13 +1,17 @@
 #! /usr/bin/env python
-"""
-   Robot interface - check URLs from bookmarks database
+"""Robot interface - check URLs from bookmarks database
 
-   Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
+This file is a part of Bookmarks database and Internet robot.
 """
 
+__version__ = "$Revision$"[11:-2]
+__revision__ = "$Id$"[5:-2]
+__date__ = "$Date$"[7:-2]
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
+__license__ = "GNU GPL"
 
-import sys, os, tempfile
-tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+import sys, os
 
 
 def run():
@@ -31,7 +35,7 @@ def run():
       pass
 
    if report_stats:
-      print "BroytMann check_urls, Copyright (C) 1997-2007 PhiloSoft Design"
+      print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
 
    if args:
       sys.stderr.write("check_urls: too many arguments\n")
@@ -63,14 +67,14 @@ def run():
    storage = storage()
 
    from robots import robot
-   robot = robot(tempfname, log)
+   robot = robot(log)
 
    if report_stats:
       sys.stdout.write("Loading %s: " % storage.filename)
       sys.stdout.flush()
 
    root_folder = storage.load()
-   from bkmk_objects import make_linear
+   from bkmk_objects import make_linear, break_tree
    make_linear(root_folder)
    objects = len(root_folder.linear)
 
@@ -103,6 +107,10 @@ def run():
       object_count = object_count + 1
 
       if object.isBookmark:
+         if object.href.startswith('place:'): # Firefox SmartBookmarks
+            log("Skipped %s" % object.href)
+            continue
+
          if only_errors:
             if hasattr(object, "error"):
                delattr(object, "error")
@@ -114,7 +122,7 @@ def run():
             old_object = root_folder.linear[checked[object.href]]
             for attr_name in ("last_visit", "last_modified",
                   "error", "no_error", "moved", "size", "md5", "real_title",
-                  "last_tested", "test_time"):
+                  "last_tested", "test_time", "icon", "charset"):
                if hasattr(old_object, attr_name):
                   setattr(object, attr_name, getattr(old_object, attr_name))
          else:
@@ -142,17 +150,13 @@ def run():
       print urls_no, "URLs checked"
       print size, "bytes eaten"
 
+   break_tree(root_folder.linear)
    storage.store(root_folder)
 
    if rcode:
       log("check_urls finished ok")
    log.close()
 
-   try:
-      os.unlink(tempfname)
-   except os.error:
-      pass
-
 
 if __name__ == '__main__':
    run()