]> git.phdru.name Git - bookmarks_db.git/blobdiff - check_title.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / check_title.py
index 554c2af2e3ce5179423fbd6ff1349f7cb92ef96e..0bb10602281defb9bd127ae91f9fdf8d8ab019a0 100755 (executable)
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 """Check and show URLs in the bookmarks database where name != real title
 
 This file is a part of Bookmarks database and Internet robot.
@@ -14,15 +14,30 @@ __copyright__ = "Copyright (C) 2002-2023 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 
+def report_title(href, title, real_title):
+    if real_title is None:
+        real_title = "NO REAL TITLE!!!"
+    s = "%s\n%s\n%s\n" % (href, title, real_title)
+
+    if log_file:
+        log_file.write("%s\n" % s)
+    else:
+        print(s)
+
+
 def run():
     from getopt import getopt
-    optlist, args = getopt(sys.argv[1:], "s")
+    optlist, args = getopt(sys.argv[1:], "sl:")
 
     report_stats = 1
+    global log_file
+    log_filename = None
 
     for _opt, _arg in optlist:
         if _opt == '-s':
             report_stats = 0
+        if _opt == '-l':
+            log_filename = _arg
     try:
         del _opt, _arg
     except NameError:
@@ -33,9 +48,12 @@ def run():
 
     if args:
         sys.stderr.write("check_title: too many arguments\n")
-        sys.stderr.write("Usage: check_title [-s]\n")
+        sys.stderr.write("Usage: check_title [-s] [-l logfile]\n")
         sys.exit(1)
 
+    if log_filename:
+        log_file = open(log_filename, 'wt', encoding='utf-8')
+
     from storage import storage
     storage = storage()
 
@@ -50,6 +68,7 @@ def run():
     if report_stats:
         print("Ok")
 
+    changed_titles = no_titles = 0
     for object_no in range(objects):
         object = root_folder.linear[object_no]
 
@@ -63,18 +82,16 @@ def run():
                 unquoted_title = unquote_title(quote_title(object.real_title))
                 unquoted_name = unquote_title(object.name)
                 if unquoted_name != unquoted_title:
-                    print(object.href)
-                    print(unquoted_name)
-                    print(unquoted_title)
-                    print()
+                    report_title(object.href, unquoted_name, unquoted_title)
+                    changed_titles += 1
             else:
-                print(object.href)
-                print(object.name)
-                print("NO REAL TITLE!!!")
-                print()
+                report_title(object.href, object.name, None)
+                no_titles += 1
 
     if report_stats:
         print(objects, "objects passed")
+        print(changed_titles, "changed titles")
+        print(no_titles, "no titles")
 
 
 if __name__ == '__main__':