]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_urls.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_urls.py
index ae7fe830475ac067a187a243124fe7065d2de213..ecf8ad62c2042ed4412aa4aa40fcedf028f89d12 100755 (executable)
-#! /usr/bin/env python
-"""Robot interface - check URLs from bookmarks database
+#! /usr/bin/env python3
+"""Robot interface - check URLs from the command line
 
 This file is a part of Bookmarks database and Internet robot.
 """
 
-from __future__ import print_function
-import sys, os
-
-
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design"
 __license__ = "GNU GPL"
 
+import sys
+
+from bkmk_objects import Bookmark
+from Writers.bkmk_wflad import strftime
+
+try:
+    import httplib
+except ImportError:
+    pass
+else:
+    HTTP = httplib.HTTP
+
+    class MyHTTP(HTTP):
+        def _setup(self, conn):
+            HTTP._setup(self, conn)
+            self.set_debuglevel(1)
+
+    httplib.HTTP = MyHTTP
+
 
 def run():
-    from getopt import getopt
-    optlist, args = getopt(sys.argv[1:], "ise")
-
-    show_pbar = 1
-    report_stats = 1
-    only_errors = 0
-
-    for _opt, _arg in optlist:
-        if _opt == '-i':
-            show_pbar = 0
-        if _opt == '-s':
-            report_stats = 0
-        if _opt == '-e':
-            only_errors = 1
-    try:
-        del _opt, _arg
-    except NameError:
-        pass
-
-    if report_stats:
-        print("Broytman check_urls, Copyright (C) 1997-2023 PhiloSoft Design")
-
-    if args:
-        sys.stderr.write("check_urls: too many arguments\n")
-        sys.stderr.write("Usage: check_urls [-ise]\n")
+    print("Broytman check_urls, Copyright (C) 2010-2024 PhiloSoft Design")
+
+    if len(sys.argv) < 2:
+        sys.stderr.write("Usage: check_urls.py url1 [url2...]\n")
         sys.exit(1)
 
-    if show_pbar:
-        show_pbar = sys.stderr.isatty()
-
-    if show_pbar:
-        try:
-            from m_lib.pbar.tty_pbar import ttyProgressBar
-        except ImportError:
-            show_pbar = 0
-
-    from m_lib.flog import makelog, openlog
-    if only_errors:
-        log = openlog("check.log")
-        log("chk_urls restarted for errors")
-        if report_stats:
-            print("chk_urls restarted for errors")
-    else:
-        log = makelog("check.log")
-        log("check_urls started")
-        if report_stats:
-            print("   check_urls: normal start")
-
-    from storage import storage
-    storage = storage()
+    from m_lib.flog import makelog
+    log = makelog("check.log")
+    log.outfile.reconfigure(encoding='utf-8')
+    sys.stdout.reconfigure(encoding='utf-8')
 
     from robots import robot
     robot = robot(log)
 
-    if report_stats:
-        sys.stdout.write("Loading %s: " % storage.filename)
-        sys.stdout.flush()
+    for url in sys.argv[1:]:
+        bookmark = Bookmark(href=url, add_date=None)
+        bookmark.parent = None
 
-    root_folder = storage.load()
-    from bkmk_objects import make_linear, break_tree
-    make_linear(root_folder)
-    objects = len(root_folder.linear)
+        rcode = robot.check_url(bookmark)
+        print("check_urls: %s" % rcode)
 
-    if report_stats:
-        print("Ok")
+        if hasattr(bookmark, 'error'):
+            print(bookmark.error)
 
-    if report_stats:
-        if only_errors:
-            s = "Rechecking errors: "
         else:
-            s = "Checking: "
-        sys.stdout.write(s)
-        sys.stdout.flush()
-
-    if show_pbar:
-        pbar = ttyProgressBar(0, objects)
-
-    urls_no = 0
-    object_count = 0
-    size = 0
-
-    checked = {}
-    rcode = 1
-
-    for object_no in range(objects):
-        if show_pbar:
-            pbar.display(object_no+1)
-
-        object = root_folder.linear[object_no]
-        object_count = object_count + 1
-
-        if object.isBookmark:
-            href = object.href
-            if (href.startswith('place:') # Firefox SmartBookmarks
-                    or '%s' in href): # Bookmark with keyword
-                log("Skipped %s" % href)
-                continue
-
-            if only_errors:
-                if hasattr(object, "error"):
-                    delattr(object, "error")
-                else:
-                    continue
-
-            if checked.has_key(href):
-                log("Already checked %s" % href)
-                old_object = root_folder.linear[checked[href]]
-                for attr_name in ("last_visit", "last_modified",
-                      "error", "no_error", "moved", "size", "md5", "real_title",
-                      "last_tested", "test_time", "icon", "charset"):
-                    if hasattr(old_object, attr_name):
-                        setattr(object, attr_name, getattr(old_object, attr_name))
-            else:
-                log("Checking %s" % href)
-                rcode = robot.check_url(object)
-
-                if rcode:
-                    checked[href] = object_no
-                    urls_no = urls_no + 1
-                    try:
-                        size = size + int(object.size)
-                    except (AttributeError, TypeError, ValueError):
-                        pass # Some object does not have a size :(
-                else:
-                    log("Interrupted by user (^C)")
-                    break
-    robot.stop()
-
-    if show_pbar:
-        del pbar
+            print("""\
+  Title: %s
+  URL: %s
+  LastModified: %s
+  Moved: %s
+  Size: %s
+  Md5: %s
+  IconURI: %s
+  Icon: %s
+  Charset: %s
+  """ % (
+              getattr(bookmark, 'real_title', None)
+              or getattr(bookmark, 'title', None),
+              bookmark.href,
+              strftime(bookmark.last_modified),
+              getattr(bookmark, 'moved', None),
+              getattr(bookmark, 'size', None),
+              getattr(bookmark, 'md5', None),
+              bookmark.icon_href, bookmark.icon, bookmark.charset,
+              )
+            )
 
-    if report_stats:
-        print("Ok")
-        print(object_count, "objects passed")
-        print(urls_no, "URLs checked")
-        print(size, "bytes eaten")
-
-    break_tree(root_folder.linear)
-    storage.store(root_folder)
-
-    if rcode:
-        log("check_urls finished ok")
+    robot.stop()
     log.close()