]> git.phdru.name Git - bookmarks_db.git/blob - bkmk_objects.py
Fix(Robot): Stop splitting and un-splitting URLs
[bookmarks_db.git] / bkmk_objects.py
1 """Objects to represent bookmarks.html structure
2
3 This file is a part of Bookmarks database and Internet robot.
4
5 """
6
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
9 __license__ = "GNU GPL"
10
11 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
12     'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
13     'quote_title', 'unquote_title', 'parse_params', 'set_params',
14 ]
15
16
17 import os, urllib
18
19 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
20
21 class Folder(list):
22     isFolder = 1
23     isBookmark = 0
24
25     def __init__(self, add_date=None, comment='', last_modified=None):
26         super(Folder, self).__init__()
27         self.comment = comment
28         self.add_date = add_date
29         self.last_modified = last_modified
30
31     def walk_depth(self, walker, level=0):
32         if hasattr(self, "header"): # root folder
33             prune = 0
34             walker.root_folder(self)
35         else:
36             prune = walker.prune_folder(self)
37             if not prune:
38                 walker.start_folder(self, level)
39
40         if not prune:
41             for object in self:
42                 if object.isFolder:
43                     object.walk_depth(walker, level+1)
44                 elif object.isBookmark:
45                     walker.bookmark(object, level)
46                 else:
47                     walker.ruler(object, level)
48
49             walker.end_folder(self, level)
50
51
52 class Bookmark(object):
53     isFolder = 0
54     isBookmark = 1
55
56     def __init__(self, href, add_date, last_visit=None, last_modified=None,
57           keyword=None, comment='', icon_href=None, icon=None,
58           charset=None, parser_charset=None):
59         protocol, request = urllib.splittype(href)
60         user, password, port = None, None, None
61         host, path = urllib.splithost(request)
62         if host:
63             user, host = urllib.splituser(host)
64             if user:
65                 user, password = urllib.splitpasswd(user)
66             host, port = urllib.splitport(host)
67             if port: port = int(port)
68
69         if protocol == 'place':
70             href = protocol + ":"
71         else:
72             href = protocol + "://"
73         if user:
74             href += urllib.quote(user)
75             if password:
76                 href += ':' + urllib.quote(password)
77             href += '@'
78         if host:
79             href += host.decode(parser_charset or 'utf-8').encode('idna')
80             if port:
81                 href += ':%d' % port
82         if path:
83             href += path
84
85         self.href = href
86         self.add_date = add_date
87         self.last_visit = last_visit
88         self.last_modified = last_modified
89         self.keyword = keyword
90         self.comment = comment
91         self.icon_href = icon_href
92         self.icon = icon
93         self.charset = charset
94
95
96 class Ruler(object):
97     isFolder = 0
98     isBookmark = 0
99
100
101 class Walker(object):
102     """
103        Interface class. Any instance that will be passed to Folder.walk_depth
104        may be derived from this class. It is not mandatory - unlike Java
105        Python does not require interface classes; but it is convenient to have
106        some methods predefined to no-op, in case you do not want to
107        provide end_folder etc.
108     """
109
110     def root_folder(self, r):
111         pass
112
113     def start_folder(self, f, level):
114         pass
115
116     def end_folder(self, f, level):
117         pass
118
119     def bookmark(self, b, level):
120         pass
121
122     def ruler(self, r, level):
123         pass
124
125     def prune_folder(self, folder):
126         return 0
127
128
129 class Writer(Walker):
130     def __init__(self, outfile, prune=None):
131         self.outfile = outfile
132         self.prune = prune
133
134     def prune_folder(self, folder):
135         return self.prune == folder.name
136
137
138 class Robot(object):
139     def __init__(self, log):
140         self.log = log
141
142     def stop(self):
143         pass # Nothing to do on cleanup
144
145
146 # Helper class to make inverese links (nodes linked to their parent)
147 class InverseLinker(Walker):
148     def root_folder(self, r):
149         self.parent_stack = [r]
150
151     def start_folder(self, f, level):
152         f.parent = self.parent_stack[-1]
153         self.parent_stack.append(f) # Push the folder onto the stack of parents
154
155     def end_folder(self, f, level):
156         del self.parent_stack[-1]   # Pop off the stack
157
158     def bookmark(self, b, level):
159         b.parent = self.parent_stack[-1]
160
161     def ruler(self, r, level):
162         r.parent = self.parent_stack[-1]
163
164
165 # Helper class to make linear represenatation of the tree
166 class Linear(Walker):
167     def root_folder(self, r):
168         r.linear = [r]
169         self.linear = r.linear
170
171     def add_object(self, object):
172         self.linear.append(object)
173
174     def start_folder(self, f, level):
175         self.add_object(f)
176
177     def bookmark(self, b, level):
178         self.add_object(b)
179
180     def ruler(self, r, level):
181         self.add_object(r)
182
183
184 # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
185 def make_linear(root_folder):
186     linker = InverseLinker()
187     root_folder.walk_depth(linker)
188
189     linear = Linear()
190     root_folder.walk_depth(linear)
191
192
193 # Helper, opposite of make_linear - make a tree from the linked linear representation
194 def make_tree(linear):
195     root_folder = linear[0]
196     del linear[0]
197
198     for object in linear:
199         object.parent.append(object)
200
201     return root_folder
202
203 def break_tree(linear):
204     del linear[0]
205
206     for object in linear:
207         del object.parent
208
209
210 def quote_title(title):
211     if BKMK_FORMAT == "MOZILLA":
212         title = title.replace("'", "&#39;")
213     return title
214
215 def unquote_title(title):
216     if BKMK_FORMAT == "MOZILLA":
217         from HTMLParser import HTMLParser
218         title = HTMLParser().unescape(title.replace("&amp;", '&').decode('utf-8'))
219         title = title.encode('utf-8').replace("&#39;", "'")
220     return title
221
222
223 def parse_params(param_str):
224     params = param_str.split(':')
225     main_param = params.pop(0)
226     param_list = {}
227     for param in params:
228         key, value = param.split('=', 1)
229         param_list[key] = value
230     return main_param, param_list
231
232 def set_params(obj, params):
233     if hasattr(params, "items"):
234         params = params.items()
235     for key, value in params:
236         setattr(obj, key, value)