]> git.phdru.name Git - bookmarks_db.git/blob - bkmk_objects.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / bkmk_objects.py
1 """Objects to represent bookmarks.html structure
2
3 This file is a part of Bookmarks database and Internet robot.
4
5 """
6
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
10
11 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
12            'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
13            'quote_title', 'unquote_title', 'parse_params', 'set_params',
14            ]
15
16
17 from urllib.parse import unquote
18 import os
19
20 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
21
22
23 class Folder(list):
24     isFolder = 1
25     isBookmark = 0
26
27     def __init__(self, add_date=None, comment='', last_modified=None):
28         super(Folder, self).__init__()
29         self.comment = comment
30         self.add_date = add_date
31         self.last_modified = last_modified
32
33     def walk_depth(self, walker, level=0):
34         if hasattr(self, "header"):  # root folder
35             prune = 0
36             walker.root_folder(self)
37         else:
38             prune = walker.prune_folder(self)
39             if not prune:
40                 walker.start_folder(self, level)
41
42         if not prune:
43             for object in self:
44                 if object.isFolder:
45                     object.walk_depth(walker, level+1)
46                 elif object.isBookmark:
47                     walker.bookmark(object, level)
48                 else:
49                     walker.ruler(object, level)
50
51             walker.end_folder(self, level)
52
53
54 class Bookmark(object):
55     isFolder = 0
56     isBookmark = 1
57
58     def __init__(self, href, add_date, last_visit=None, last_modified=None,
59                  keyword=None, comment='', icon_href=None, icon=None,
60                  charset=None, parser_charset=None):
61         self.href = href
62         self.add_date = add_date
63         self.last_visit = last_visit
64         self.last_modified = last_modified
65         self.keyword = keyword
66         self.comment = comment
67         self.icon_href = icon_href
68         self.icon = icon
69         self.charset = charset
70
71
72 class Ruler(object):
73     isFolder = 0
74     isBookmark = 0
75
76
77 class Walker(object):
78     """
79        Interface class. Any instance that will be passed to Folder.walk_depth
80        may be derived from this class. It is not mandatory - unlike Java
81        Python does not require interface classes; but it is convenient to have
82        some methods predefined to no-op, in case you do not want to
83        provide end_folder etc.
84     """
85
86     def root_folder(self, r):
87         pass
88
89     def start_folder(self, f, level):
90         pass
91
92     def end_folder(self, f, level):
93         pass
94
95     def bookmark(self, b, level):
96         pass
97
98     def ruler(self, r, level):
99         pass
100
101     def prune_folder(self, folder):
102         return 0
103
104
105 class Writer(Walker):
106     def __init__(self, outfile, prune=None):
107         self.outfile = outfile
108         self.prune = prune
109
110     def prune_folder(self, folder):
111         return self.prune == folder.name
112
113
114 class Robot(object):
115     def __init__(self, log):
116         self.log = log
117
118     def stop(self):
119         pass  # Nothing to do on cleanup
120
121
122 # Helper class to make inverese links (nodes linked to their parent)
123 class InverseLinker(Walker):
124     def root_folder(self, r):
125         self.parent_stack = [r]
126
127     def start_folder(self, f, level):
128         f.parent = self.parent_stack[-1]
129         # Push the folder onto the stack of parents
130         self.parent_stack.append(f)
131
132     def end_folder(self, f, level):
133         del self.parent_stack[-1]   # Pop off the stack
134
135     def bookmark(self, b, level):
136         b.parent = self.parent_stack[-1]
137
138     def ruler(self, r, level):
139         r.parent = self.parent_stack[-1]
140
141
142 # Helper class to make linear represenatation of the tree
143 class Linear(Walker):
144     def root_folder(self, r):
145         r.linear = [r]
146         self.linear = r.linear
147
148     def add_object(self, object):
149         self.linear.append(object)
150
151     def start_folder(self, f, level):
152         self.add_object(f)
153
154     def bookmark(self, b, level):
155         self.add_object(b)
156
157     def ruler(self, r, level):
158         self.add_object(r)
159
160
161 # Helper - make linked linear represenatation of the tree,
162 # suitable to be stored in sequential storage.
163 def make_linear(root_folder):
164     linker = InverseLinker()
165     root_folder.walk_depth(linker)
166
167     linear = Linear()
168     root_folder.walk_depth(linear)
169
170
171 # Helper, opposite of make_linear -
172 # make a tree from the linked linear representation.
173 def make_tree(linear):
174     root_folder = linear[0]
175     del linear[0]
176
177     for object in linear:
178         object.parent.append(object)
179
180     return root_folder
181
182
183 def break_tree(linear):
184     del linear[0]
185
186     for object in linear:
187         del object.parent
188
189
190 def quote_title(title):
191     if BKMK_FORMAT == "MOZILLA":
192         title = title.replace("'", "&#39;")
193     return title
194
195
196 def unquote_title(title):
197     if BKMK_FORMAT == "MOZILLA":
198         try:
199             from HTMLParser import HTMLParser
200         except ImportError:
201             from html import unescape
202         else:
203             unescape = HTMLParser().unescape
204         title = unescape(
205             title.replace("&amp;", '&'))
206         title = title.replace("&#39;", "'")
207     return title
208
209
210 def parse_params(param_str):
211     params = param_str.split(':')
212     main_param = params.pop(0)
213     param_list = {}
214     for param in params:
215         key, value = param.split('=', 1)
216         param_list[key] = unquote(value)
217     return main_param, param_list
218
219
220 def set_params(obj, params):
221     if hasattr(params, "items"):
222         params = params.items()
223     for key, value in params:
224         setattr(obj, key, value)