]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_urls.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_urls.py
index 305458028b38fb51caee8b747bf60041943584a3..ecf8ad62c2042ed4412aa4aa40fcedf028f89d12 100755 (executable)
-#! /usr/bin/env python
-"""Robot interface - check URLs from bookmarks database
+#! /usr/bin/env python3
+"""Robot interface - check URLs from the command line
 
 This file is a part of Bookmarks database and Internet robot.
 """
 
-__version__ = "$Revision$"[11:-2]
-__revision__ = "$Id$"[5:-2]
-__date__ = "$Date$"[7:-2]
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design"
 __license__ = "GNU GPL"
 
-import sys, os
+import sys
+
+from bkmk_objects import Bookmark
+from Writers.bkmk_wflad import strftime
+
+try:
+    import httplib
+except ImportError:
+    pass
+else:
+    HTTP = httplib.HTTP
+
+    class MyHTTP(HTTP):
+        def _setup(self, conn):
+            HTTP._setup(self, conn)
+            self.set_debuglevel(1)
+
+    httplib.HTTP = MyHTTP
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "ise")
-
-   show_pbar = 1
-   report_stats = 1
-   only_errors = 0
-
-   for _opt, _arg in optlist:
-      if _opt == '-i':
-         show_pbar = 0
-      if _opt == '-s':
-         report_stats = 0
-      if _opt == '-e':
-         only_errors = 1
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
-
-   if report_stats:
-      print "Broytman check_urls, Copyright (C) 1997-2010 PhiloSoft Design"
-
-   if args:
-      sys.stderr.write("check_urls: too many arguments\n")
-      sys.stderr.write("Usage: check_urls [-ise]\n")
-      sys.exit(1)
-
-   if show_pbar:
-      show_pbar = sys.stderr.isatty()
-
-   if show_pbar:
-      try:
-         from m_lib.pbar.tty_pbar import ttyProgressBar
-      except ImportError:
-         show_pbar = 0
-
-   from m_lib.flog import makelog, openlog
-   if only_errors:
-      log = openlog("check.log")
-      log("chk_urls restarted for errors")
-      if report_stats:
-         print "chk_urls restarted for errors"
-   else:
-      log = makelog("check.log")
-      log("check_urls started")
-      if report_stats:
-         print "   check_urls: normal start"
-
-   from storage import storage
-   storage = storage()
-
-   from robots import robot
-   robot = robot(log)
-
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
-
-   root_folder = storage.load()
-   from bkmk_objects import make_linear, break_tree
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
-
-   if report_stats:
-      print "Ok"
-
-   if report_stats:
-      if only_errors:
-         s = "Rechecking errors: "
-      else:
-         s = "Checking: "
-      sys.stdout.write(s)
-      sys.stdout.flush()
-
-   if show_pbar:
-      pbar = ttyProgressBar(0, objects)
-
-   urls_no = 0
-   object_count = 0
-   size = 0
-
-   checked = {}
-   rcode = 1
-
-   for object_no in range(objects):
-      if show_pbar:
-         pbar.display(object_no+1)
-
-      object = root_folder.linear[object_no]
-      object_count = object_count + 1
-
-      if object.isBookmark:
-         if object.href.startswith('place:'): # Firefox SmartBookmarks
-            log("Skipped %s" % object.href)
-            continue
-
-         if only_errors:
-            if hasattr(object, "error"):
-               delattr(object, "error")
-            else:
-               continue
-
-         if checked.has_key(object.href):
-            log("Already checked %s" % object.href)
-            old_object = root_folder.linear[checked[object.href]]
-            for attr_name in ("last_visit", "last_modified",
-                  "error", "no_error", "moved", "size", "md5", "real_title",
-                  "last_tested", "test_time", "icon", "charset"):
-               if hasattr(old_object, attr_name):
-                  setattr(object, attr_name, getattr(old_object, attr_name))
-         else:
-            log("Checking %s" % object.href)
-            rcode = robot.check_url(object)
-
-            if rcode:
-               checked[object.href] = object_no
-               urls_no = urls_no + 1
-               try:
-                  size = size + int(object.size)
-               except (AttributeError, TypeError, ValueError):
-                  pass # Some object does not have a size :(
-            else:
-               log("Interrupted by user (^C)")
-               break
-   robot.stop()
-
-   if show_pbar:
-      del pbar
-
-   if report_stats:
-      print "Ok"
-      print object_count, "objects passed"
-      print urls_no, "URLs checked"
-      print size, "bytes eaten"
-
-   break_tree(root_folder.linear)
-   storage.store(root_folder)
-
-   if rcode:
-      log("check_urls finished ok")
-   log.close()
+    print("Broytman check_urls, Copyright (C) 2010-2024 PhiloSoft Design")
+
+    if len(sys.argv) < 2:
+        sys.stderr.write("Usage: check_urls.py url1 [url2...]\n")
+        sys.exit(1)
+
+    from m_lib.flog import makelog
+    log = makelog("check.log")
+    log.outfile.reconfigure(encoding='utf-8')
+    sys.stdout.reconfigure(encoding='utf-8')
+
+    from robots import robot
+    robot = robot(log)
+
+    for url in sys.argv[1:]:
+        bookmark = Bookmark(href=url, add_date=None)
+        bookmark.parent = None
+
+        rcode = robot.check_url(bookmark)
+        print("check_urls: %s" % rcode)
+
+        if hasattr(bookmark, 'error'):
+            print(bookmark.error)
+
+        else:
+            print("""\
+  Title: %s
+  URL: %s
+  LastModified: %s
+  Moved: %s
+  Size: %s
+  Md5: %s
+  IconURI: %s
+  Icon: %s
+  Charset: %s
+  """ % (
+              getattr(bookmark, 'real_title', None)
+              or getattr(bookmark, 'title', None),
+              bookmark.href,
+              strftime(bookmark.last_modified),
+              getattr(bookmark, 'moved', None),
+              getattr(bookmark, 'size', None),
+              getattr(bookmark, 'md5', None),
+              bookmark.icon_href, bookmark.icon, bookmark.charset,
+              )
+            )
+
+    robot.stop()
+    log.close()
 
 
 if __name__ == '__main__':
-   run()
+    run()