]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat: Report redirects and set URLs
authorOleg Broytman <phd@phdru.name>
Tue, 14 Nov 2023 15:11:12 +0000 (18:11 +0300)
committerOleg Broytman <phd@phdru.name>
Tue, 14 Nov 2023 17:12:15 +0000 (20:12 +0300)
Run through the bookmarks database and set URLs from redirects
from an external file.

bkmk-chk
check_redirects.py [new file with mode: 0755]
doc/ANNOUNCE
doc/ChangeLog
set-URLs [new file with mode: 0755]
set-URLs.py [new file with mode: 0755]

index 3e5e5fe98789aecfcd8b7f59f06c8a5ce018dd23..6a748e9e8ffaa67e1954f2f85c79299fe6063206 100755 (executable)
--- a/bkmk-chk
+++ b/bkmk-chk
@@ -7,7 +7,7 @@
 # This file is a part of Bookmarks database and Internet robot.
 #
 # __author__ = "Oleg Broytman <phd@phdru.name>"
-# __copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+# __copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
 # __license__ = "GNU GPL"
 
 . "`dirname \"$0\"`"/set-path
@@ -24,6 +24,7 @@ check_urls.py -e &&
 BKMK_WRITER=flad db2bkmk.py &&
 check_dups.py -s -l bookmarks.err >/dev/null &&
 check_title.py > check_title.txt &&
+check_redirects.py > check_redirects.txt &&
 bkmk-sort &&
 
 # Write results to the bookmarks files
diff --git a/check_redirects.py b/check_redirects.py
new file mode 100755 (executable)
index 0000000..12c74d8
--- /dev/null
@@ -0,0 +1,72 @@
+#! /usr/bin/env python3
+"""Check and show URLs in the bookmarks database that have redirects
+
+This file is a part of Bookmarks database and Internet robot.
+"""
+
+from __future__ import print_function
+import sys
+from bkmk_objects import make_linear
+
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2023 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+
+def run():
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
+
+    report_stats = 1
+
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    if report_stats:
+        print("Broytman check_redirects, Copyright (C) 2023 PhiloSoft Design")
+
+    if args:
+        sys.stderr.write("check_redirects: too many arguments\n")
+        sys.stderr.write("Usage: check_redirects [-s]\n")
+        sys.exit(1)
+
+    from storage import storage
+    storage = storage()
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
+
+    if report_stats:
+        print("Ok")
+
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
+
+        if object.isBookmark:
+            if hasattr(object, "error") or \
+                    object.href.startswith('place:'):  # Firefox SmartBookmarks
+                continue
+
+            if hasattr(object, "moved"):
+                print(object.href)
+                print(object.moved)
+                print()
+                del object.moved
+
+    if report_stats:
+        print(objects, "objects passed")
+
+
+if __name__ == '__main__':
+    run()
index a047cd32096d115ab9b59934208836a0d1716ea2..b6712fa284a20a8980659b7feee406036b0544ee 100644 (file)
@@ -10,6 +10,8 @@ Version 5.0.0 (2023-??-??)
 
    Python 3.
 
+   Report redirects and set URLs.
+
    Remove BeautifulSoup.py (use globally installed).
 
 
index 2bb4915cc25e8bd4007230053164881e937134ee..0952548b7e1b56176ef94e3ddcb3ddade8e3e050 100644 (file)
@@ -2,6 +2,8 @@ Version 5.0.0 (2023-??-??)
 
    Python 3.
 
+   Report redirects and set URLs.
+
    Remove BeautifulSoup.py (use globally installed).
 
 Version 4.6.0 (2014-07-06)
diff --git a/set-URLs b/set-URLs
new file mode 100755 (executable)
index 0000000..d408e16
--- /dev/null
+++ b/set-URLs
@@ -0,0 +1,16 @@
+#! /bin/sh
+# Set URLs from redirects
+#
+# This file is a part of Bookmarks database and Internet robot.
+#
+# __author__ = "Oleg Broytman <phd@phdru.name>"
+# __copyright__ = "Copyright (C) 2023 PhiloSoft Design"
+# __license__ = "GNU GPL"
+
+. "`dirname \"$0\"`"/set-path
+
+set-URLs.py "$@" || exit 1
+
+BKMK_WRITER=flad db2bkmk.py &&
+convert_st.py json &&
+exec db2bkmk.py
diff --git a/set-URLs.py b/set-URLs.py
new file mode 100755 (executable)
index 0000000..bfcf5da
--- /dev/null
@@ -0,0 +1,125 @@
+#! /usr/bin/env python3
+"""Run through the bookmarks database and set URLs from redirects
+from an external file
+
+This file is a part of Bookmarks database and Internet robot.
+"""
+
+from __future__ import print_function
+import sys
+
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2023 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+
+def run():
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
+
+    report_stats = 1
+
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    if report_stats:
+        print("Broytman set-URLs, Copyright (C) 2023 PhiloSoft Design")
+
+    if len(args) != 1:
+        sys.stderr.write("Usage: set-URLs [-s] urls_file")
+        sys.exit(1)
+
+    # Read the external file with URLs and build a mapping (URL => redirect)
+    urls_dict = {}
+
+    URL = None
+    redirect = None
+
+    urls_file = open(args[0], 'rt')
+    for line in urls_file:
+        line = line[:-1]  # strip trailing newline
+        if URL is None:
+            URL = line
+
+        elif redirect is None:
+            redirect = line
+
+        elif line:  # the third line in every 3 lines must be empty
+            raise ValueError(
+                "line is not empty for URL `%s', redirect `%s': line `%s'"
+                % (URL, redirect, line)
+            )
+
+        else:  # We've got 3 lines - add new entry to the mapping
+            if not redirect.startswith('https://') \
+                    and not redirect.startswith('http://'):
+                raise ValueError("Redirect is not an URL: `%s'" % redirect)
+            if URL in urls_dict:
+                if redirect != urls_dict[URL]:
+                    raise ValueError(
+                        "Redirects are not identical for URL `%s':"
+                        " `%s' != `%s'" % (URL, redirect, urls_dict[URL])
+                    )
+
+            else:
+                urls_dict[URL] = redirect
+
+            # reset
+            URL = None
+            redirect = None
+
+    urls_file.close()
+
+    from storage import storage
+    storage = storage()
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+    from bkmk_objects import make_linear, break_tree
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
+
+    if report_stats:
+        print("Ok")
+
+    # Run through the list of objects and check URLs/redirects
+    changed = 0
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
+
+        if object.isBookmark:
+            URL = object.href
+            if URL in urls_dict:
+                redirect = urls_dict[URL]
+                object.href = redirect
+                changed += 1
+
+    if changed and report_stats:
+        sys.stdout.write("Saving %s: " % storage.filename)
+        sys.stdout.flush()
+
+    if not changed and report_stats:
+        sys.stdout.write("No need to save data\n")
+        sys.stdout.flush()
+
+    if changed:
+        break_tree(root_folder.linear)
+        storage.store(root_folder)
+
+    if changed and report_stats:
+        print("Ok")
+        print(objects, "objects passed")
+        print(changed, "objects changed")
+
+
+if __name__ == '__main__':
+    run()