]> git.phdru.name Git - bookmarks_db.git/blob - check_dups.py
Style: Fix flake8 E303 too many blank lines
[bookmarks_db.git] / check_dups.py
1 #! /usr/bin/env python
2 """Check duplicate URLs in the bookmarks database
3
4 This file is a part of Bookmarks database and Internet robot.
5 """
6
7 from __future__ import print_function
8 import sys
9
10
11 __author__ = "Oleg Broytman <phd@phdru.name>"
12 __copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
13 __license__ = "GNU GPL"
14
15
16 log_file = None
17
18
19 def report_dup(href, object_no):
20     s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
21
22     if log_file:
23         log_file.write("%s\n" % s)
24     else:
25         print(s)
26
27
28 def run():
29     from getopt import getopt
30     optlist, args = getopt(sys.argv[1:], "sl:")
31
32     report_stats = 1
33     global log_file
34     log_filename = None
35
36     for _opt, _arg in optlist:
37         if _opt == '-s':
38             report_stats = 0
39         if _opt == '-l':
40             log_filename = _arg
41     try:
42         del _opt, _arg
43     except NameError:
44         pass
45
46     if report_stats:
47         print("Broytman check_dups, Copyright (C) 2000-2023 PhiloSoft Design")
48
49     if args:
50         sys.stderr.write("check_urls: too many arguments\n")
51         sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
52         sys.exit(1)
53
54     if log_filename:
55         log_file = open(log_filename, 'w')
56
57     from storage import storage
58     storage = storage()
59
60     if report_stats:
61         sys.stdout.write("Loading %s: " % storage.filename)
62         sys.stdout.flush()
63
64     root_folder = storage.load()
65     from bkmk_objects import make_linear
66     make_linear(root_folder)
67     objects = len(root_folder.linear)
68
69     if report_stats:
70         print("Ok")
71
72     dup_dict = {}
73
74     for object_no in range(objects):
75         object = root_folder.linear[object_no]
76
77         if object.isBookmark:
78             href = object.href
79             if dup_dict.has_key(href):
80                 report_dup(href, dup_dict[href])
81             else:
82                 dup_dict[href] = object_no
83
84     if log_filename:
85         log_file.close()
86
87     if report_stats:
88         print("Ok")
89         print(objects, "objects passed")
90
91
92 if __name__ == '__main__':
93     run()