]> git.phdru.name Git - bookmarks_db.git/blob - bkmk_objects.py
Split hrefs into domain and path components; recode only domain.
[bookmarks_db.git] / bkmk_objects.py
1 """Objects to represent bookmarks.html structure
2
3 This file is a part of Bookmarks database and Internet robot.
4 """
5
6 __version__ = "$Revision$"[11:-2]
7 __revision__ = "$Id$"[5:-2]
8 __date__ = "$Date$"[7:-2]
9 __author__ = "Oleg Broytman <phd@phdru.name>"
10 __copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
11 __license__ = "GNU GPL"
12
13 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
14     'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
15     'quote_title', 'unquote_title',
16 ]
17
18
19 import os, urllib
20
21 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
22
23 class Folder(list):
24    isFolder = 1
25    isBookmark = 0
26
27    def __init__(self, add_date=None, comment='', last_modified=None):
28       super(Folder, self).__init__()
29       self.comment = comment
30       self.add_date = add_date
31       self.last_modified = last_modified
32
33    def walk_depth(self, walker, level=0):
34       if hasattr(self, "header"): # root folder
35          prune = 0
36          walker.root_folder(self)
37       else:
38          prune = walker.prune_folder(self)
39          if not prune:
40             walker.start_folder(self, level)
41
42       if not prune:
43          for object in self:
44             if object.isFolder:
45                object.walk_depth(walker, level+1)
46             elif object.isBookmark:
47                walker.bookmark(object, level)
48             else:
49                walker.ruler(object, level)
50
51          walker.end_folder(self, level)
52
53
54 class Bookmark:
55    isFolder = 0
56    isBookmark = 1
57
58    def __init__(self, href, add_date, last_visit=None, last_modified=None,
59          keyword=None, comment='', icon_href=None, icon=None,
60          charset=None, parser_charset=None):
61       protocol, request = urllib.splittype(href)
62       user, password, port = None, None, None
63       host, path = urllib.splithost(request)
64       if host:
65          user, host = urllib.splituser(host)
66          if user:
67             user, password = urllib.splitpasswd(user)
68          host, port = urllib.splitport(host)
69          if port: port = int(port)
70       path, tag = urllib.splittag(path)
71       path, query = urllib.splitquery(path)
72       path = urllib.unquote(path)
73       if tag: tag = urllib.unquote_plus(tag)
74
75       if host: # host can be None for Mozilla's place: URLs
76           host = host.decode(parser_charset or 'utf-8').encode('idna')
77
78       href = protocol + "://"
79       if user:
80          href += urllib.quote(user)
81          if password:
82             href += ':' + urllib.quote(password)
83          href += '@'
84       if host:
85          href += host
86          if port:
87             href += ':%d' % port
88       if path:
89          if protocol == "file":
90             href += urllib.quote(path)
91          else:
92             href += urllib.quote(path)
93       if query:
94          href += '?' + query
95       if tag:
96          href += '#' + urllib.quote_plus(tag)
97
98       self.href = href
99       self.add_date = add_date
100       self.last_visit = last_visit
101       self.last_modified = last_modified
102       self.keyword = keyword
103       self.comment = comment
104       self.icon_href = icon_href
105       self.icon = icon
106       self.charset = charset
107
108
109 class Ruler:
110    isFolder = 0
111    isBookmark = 0
112
113
114 class Walker:
115    """
116       Interface class. Any instance that will be passed to Folder.walk_depth
117       may be derived from this class. It is not mandatory - unlike Java
118       Python does not require interface classes; but it is convenient to have
119       some methods predefined to no-op, in case you do not want to
120       provide end_folder etc.
121    """
122
123    def root_folder(self, r):
124       pass
125
126    def start_folder(self, f, level):
127       pass
128
129    def end_folder(self, f, level):
130       pass
131
132    def bookmark(self, b, level):
133       pass
134
135    def ruler(self, r, level):
136       pass
137
138    def prune_folder(self, folder):
139       return 0
140
141
142 class Writer(Walker):
143    def __init__(self, outfile, prune=None):
144       self.outfile = outfile
145       self.prune = prune
146
147    def prune_folder(self, folder):
148       return self.prune == folder.name
149
150
151 class Robot:
152    def __init__(self, log):
153       self.log = log
154
155    def stop(self):
156       pass # Nothing to do on cleanup
157
158
159 # Helper class to make inverese links (nodes linked to their parent)
160 class InverseLinker(Walker):
161    def root_folder(self, r):
162       self.parent_stack = [r]
163
164    def start_folder(self, f, level):
165       f.parent = self.parent_stack[-1]
166       self.parent_stack.append(f) # Push the folder onto the stack of parents
167
168    def end_folder(self, f, level):
169       del self.parent_stack[-1]   # Pop off the stack
170
171    def bookmark(self, b, level):
172       b.parent = self.parent_stack[-1]
173
174    def ruler(self, r, level):
175       r.parent = self.parent_stack[-1]
176
177
178 # Helper class to make linear represenatation of the tree
179 class Linear(Walker):
180    def root_folder(self, r):
181       r.linear = [r]
182       self.linear = r.linear
183
184    def add_object(self, object):
185       self.linear.append(object)
186
187    def start_folder(self, f, level):
188       self.add_object(f)
189
190    def bookmark(self, b, level):
191       self.add_object(b)
192
193    def ruler(self, r, level):
194       self.add_object(r)
195
196
197 # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
198 def make_linear(root_folder):
199    linker = InverseLinker()
200    root_folder.walk_depth(linker)
201
202    linear = Linear()
203    root_folder.walk_depth(linear)
204
205
206 # Helper, opposite of make_linear - make a tree from the linked linear representation
207 def make_tree(linear):
208    root_folder = linear[0]
209    del linear[0]
210
211    for object in linear:
212       object.parent.append(object)
213
214    return root_folder
215
216 def break_tree(linear):
217    del linear[0]
218
219    for object in linear:
220       del object.parent
221
222
223 def quote_title(title):
224    if BKMK_FORMAT == "MOZILLA":
225       title = title.replace("'", "&#39;")
226    return title
227
228 def unquote_title(title):
229    if BKMK_FORMAT == "MOZILLA":
230       from HTMLParser import HTMLParser
231       title = HTMLParser().unescape(title.replace("&amp;", '&'))
232       title = title.replace("&#39;", "'")
233    return title