]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_title.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_title.py
index 950c6e276f8141cf251cc2960e6bf5a14f5eefe0..0bb10602281defb9bd127ae91f9fdf8d8ab019a0 100755 (executable)
@@ -1,75 +1,98 @@
-#! /usr/local/bin/python -O
-"""
-   Check and show URLs in the bookmarks database where name <> real title
+#! /usr/bin/env python3
+"""Check and show URLs in the bookmarks database where name != real title
 
-   Written by BroytMann, Jul 2002 - Aug 2002. Copyright (C) 2002 PhiloSoft Design
+This file is a part of Bookmarks database and Internet robot.
 """
 
-
+from __future__ import print_function
 import sys
+from bkmk_objects import make_linear, quote_title, unquote_title
 
 
-def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "s")
-
-   report_stats = 1
-
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
-
-   if report_stats:
-      print "BroytMann check_title, Copyright (C) 2002 PhiloSoft Design"
-
-   if args:
-      sys.stderr.write("check_title: too many arguments\n")
-      sys.stderr.write("Usage: check_title [-s]\n")
-      sys.exit(1)
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2002-2023 PhiloSoft Design"
+__license__ = "GNU GPL"
 
-   from storage import storage
-   storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+def report_title(href, title, real_title):
+    if real_title is None:
+        real_title = "NO REAL TITLE!!!"
+    s = "%s\n%s\n%s\n" % (href, title, real_title)
 
-   root_folder = storage.load()
-   from bkmk_objects import make_linear
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
+    if log_file:
+        log_file.write("%s\n" % s)
+    else:
+        print(s)
 
-   if report_stats:
-      print "Ok"
 
-
-   for object_no in range(objects):
-      object = root_folder.linear[object_no]
-
-      if object.isBookmark:
-         if hasattr(object, "moved") or hasattr(object, "error"):
-            continue
-
-         if hasattr(object, "real_title"):
-            if object.name <> object.real_title:
-               print object.href
-               print object.name
-               print object.real_title
-               print
-         else:
-            print object.href
-            print object.name
-            print "NO REAL TITLE!!!"
-            print
-
-
-   if report_stats:
-      print objects, "objects passed"
+def run():
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "sl:")
+
+    report_stats = 1
+    global log_file
+    log_filename = None
+
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+        if _opt == '-l':
+            log_filename = _arg
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    if report_stats:
+        print("Broytman check_title, Copyright (C) 2002-2023 PhiloSoft Design")
+
+    if args:
+        sys.stderr.write("check_title: too many arguments\n")
+        sys.stderr.write("Usage: check_title [-s] [-l logfile]\n")
+        sys.exit(1)
+
+    if log_filename:
+        log_file = open(log_filename, 'wt', encoding='utf-8')
+
+    from storage import storage
+    storage = storage()
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
+
+    if report_stats:
+        print("Ok")
+
+    changed_titles = no_titles = 0
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
+
+        if object.isBookmark:
+            if hasattr(object, "moved") or hasattr(object, "error") or \
+                    object.href.startswith('place:'):  # Firefox SmartBookmarks
+                continue
+
+            if hasattr(object, "real_title") \
+                    and (object.real_title is not None):
+                unquoted_title = unquote_title(quote_title(object.real_title))
+                unquoted_name = unquote_title(object.name)
+                if unquoted_name != unquoted_title:
+                    report_title(object.href, unquoted_name, unquoted_title)
+                    changed_titles += 1
+            else:
+                report_title(object.href, object.name, None)
+                no_titles += 1
+
+    if report_stats:
+        print(objects, "objects passed")
+        print(changed_titles, "changed titles")
+        print(no_titles, "no titles")
 
 
 if __name__ == '__main__':
-   run()
+    run()