]> git.phdru.name Git - bookmarks_db.git/blobdiff - set-urls.py
Rename set-URLs -> set-urls
[bookmarks_db.git] / set-urls.py
diff --git a/set-urls.py b/set-urls.py
new file mode 100755 (executable)
index 0000000..b897078
--- /dev/null
@@ -0,0 +1,125 @@
+#! /usr/bin/env python3
+"""Run through the bookmarks database and set URLs from redirects
+from an external file
+
+This file is a part of Bookmarks database and Internet robot.
+"""
+
+from __future__ import print_function
+import sys
+
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2023, 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+
+def run():
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
+
+    report_stats = 1
+
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    if report_stats:
+        print("Broytman set-urls, Copyright (C) 2023, 2024 PhiloSoft Design")
+
+    if len(args) != 1:
+        sys.stderr.write("Usage: set-urls [-s] urls_file")
+        sys.exit(1)
+
+    # Read the external file with URLs and build a mapping (URL => redirect)
+    urls_dict = {}
+
+    URL = None
+    redirect = None
+
+    urls_file = open(args[0], 'rt')
+    for line in urls_file:
+        line = line[:-1]  # strip trailing newline
+        if URL is None:
+            URL = line
+
+        elif redirect is None:
+            redirect = line
+
+        elif line:  # the third line in every 3 lines must be empty
+            raise ValueError(
+                "line is not empty for URL `%s', redirect `%s': line `%s'"
+                % (URL, redirect, line)
+            )
+
+        else:  # We've got 3 lines - add new entry to the mapping
+            if not redirect.startswith('https://') \
+                    and not redirect.startswith('http://'):
+                raise ValueError("Redirect is not an URL: `%s'" % redirect)
+            if URL in urls_dict:
+                if redirect != urls_dict[URL]:
+                    raise ValueError(
+                        "Redirects are not identical for URL `%s':"
+                        " `%s' != `%s'" % (URL, redirect, urls_dict[URL])
+                    )
+
+            else:
+                urls_dict[URL] = redirect
+
+            # reset
+            URL = None
+            redirect = None
+
+    urls_file.close()
+
+    from storage import storage
+    storage = storage()
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+    from bkmk_objects import make_linear, break_tree
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
+
+    if report_stats:
+        print("Ok")
+
+    # Run through the list of objects and check URLs/redirects
+    changed = 0
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
+
+        if object.isBookmark:
+            URL = object.href
+            if URL in urls_dict:
+                redirect = urls_dict[URL]
+                object.href = redirect
+                changed += 1
+
+    if changed and report_stats:
+        sys.stdout.write("Saving %s: " % storage.filename)
+        sys.stdout.flush()
+
+    if not changed and report_stats:
+        sys.stdout.write("No need to save data\n")
+        sys.stdout.flush()
+
+    if changed:
+        break_tree(root_folder.linear)
+        storage.store(root_folder)
+
+    if changed and report_stats:
+        print("Ok")
+        print(objects, "objects passed")
+        print(changed, "objects changed")
+
+
+if __name__ == '__main__':
+    run()