]> git.phdru.name Git - bookmarks_db.git/blob - bkmk_objects.py
Convert to unicode and back again to unescape unichr'd entities.
[bookmarks_db.git] / bkmk_objects.py
1 """Objects to represent bookmarks.html structure
2
3 This file is a part of Bookmarks database and Internet robot.
4 """
5
6 __version__ = "$Revision$"[11:-2]
7 __revision__ = "$Id$"[5:-2]
8 __date__ = "$Date$"[7:-2]
9 __author__ = "Oleg Broytman <phd@phdru.name>"
10 __copyright__ = "Copyright (C) 2000-2011 PhiloSoft Design"
11 __license__ = "GNU GPL"
12
13 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
14     'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
15     'quote_title', 'unquote_title',
16 ]
17
18
19 import os, urllib
20
21 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
22
23 class Folder(list):
24    isFolder = 1
25    isBookmark = 0
26
27    def __init__(self, add_date=None, comment='', last_modified=None):
28       super(Folder, self).__init__()
29       self.comment = comment
30       self.add_date = add_date
31       self.last_modified = last_modified
32
33    def walk_depth(self, walker, level=0):
34       if hasattr(self, "header"): # root folder
35          prune = 0
36          walker.root_folder(self)
37       else:
38          prune = walker.prune_folder(self)
39          if not prune:
40             walker.start_folder(self, level)
41
42       if not prune:
43          for object in self:
44             if object.isFolder:
45                object.walk_depth(walker, level+1)
46             elif object.isBookmark:
47                walker.bookmark(object, level)
48             else:
49                walker.ruler(object, level)
50
51          walker.end_folder(self, level)
52
53
54 class Bookmark:
55    isFolder = 0
56    isBookmark = 1
57
58    def __init__(self, href, add_date, last_visit=None, last_modified=None,
59          keyword=None, comment='', icon_href=None, icon=None,
60          charset=None, parser_charset=None):
61       protocol, request = urllib.splittype(href)
62       user, password, port = None, None, None
63       host, path = urllib.splithost(request)
64       if host:
65          user, host = urllib.splituser(host)
66          if user:
67             user, password = urllib.splitpasswd(user)
68          host, port = urllib.splitport(host)
69          if port: port = int(port)
70
71       href = protocol + "://"
72       if user:
73          href += urllib.quote(user)
74          if password:
75             href += ':' + urllib.quote(password)
76          href += '@'
77       if host:
78          href += host.decode(parser_charset or 'utf-8').encode('idna')
79          if port:
80             href += ':%d' % port
81       if path:
82          href += path
83
84       self.href = href
85       self.add_date = add_date
86       self.last_visit = last_visit
87       self.last_modified = last_modified
88       self.keyword = keyword
89       self.comment = comment
90       self.icon_href = icon_href
91       self.icon = icon
92       self.charset = charset
93
94
95 class Ruler:
96    isFolder = 0
97    isBookmark = 0
98
99
100 class Walker:
101    """
102       Interface class. Any instance that will be passed to Folder.walk_depth
103       may be derived from this class. It is not mandatory - unlike Java
104       Python does not require interface classes; but it is convenient to have
105       some methods predefined to no-op, in case you do not want to
106       provide end_folder etc.
107    """
108
109    def root_folder(self, r):
110       pass
111
112    def start_folder(self, f, level):
113       pass
114
115    def end_folder(self, f, level):
116       pass
117
118    def bookmark(self, b, level):
119       pass
120
121    def ruler(self, r, level):
122       pass
123
124    def prune_folder(self, folder):
125       return 0
126
127
128 class Writer(Walker):
129    def __init__(self, outfile, prune=None):
130       self.outfile = outfile
131       self.prune = prune
132
133    def prune_folder(self, folder):
134       return self.prune == folder.name
135
136
137 class Robot:
138    def __init__(self, log):
139       self.log = log
140
141    def stop(self):
142       pass # Nothing to do on cleanup
143
144
145 # Helper class to make inverese links (nodes linked to their parent)
146 class InverseLinker(Walker):
147    def root_folder(self, r):
148       self.parent_stack = [r]
149
150    def start_folder(self, f, level):
151       f.parent = self.parent_stack[-1]
152       self.parent_stack.append(f) # Push the folder onto the stack of parents
153
154    def end_folder(self, f, level):
155       del self.parent_stack[-1]   # Pop off the stack
156
157    def bookmark(self, b, level):
158       b.parent = self.parent_stack[-1]
159
160    def ruler(self, r, level):
161       r.parent = self.parent_stack[-1]
162
163
164 # Helper class to make linear represenatation of the tree
165 class Linear(Walker):
166    def root_folder(self, r):
167       r.linear = [r]
168       self.linear = r.linear
169
170    def add_object(self, object):
171       self.linear.append(object)
172
173    def start_folder(self, f, level):
174       self.add_object(f)
175
176    def bookmark(self, b, level):
177       self.add_object(b)
178
179    def ruler(self, r, level):
180       self.add_object(r)
181
182
183 # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
184 def make_linear(root_folder):
185    linker = InverseLinker()
186    root_folder.walk_depth(linker)
187
188    linear = Linear()
189    root_folder.walk_depth(linear)
190
191
192 # Helper, opposite of make_linear - make a tree from the linked linear representation
193 def make_tree(linear):
194    root_folder = linear[0]
195    del linear[0]
196
197    for object in linear:
198       object.parent.append(object)
199
200    return root_folder
201
202 def break_tree(linear):
203    del linear[0]
204
205    for object in linear:
206       del object.parent
207
208
209 def quote_title(title):
210    if BKMK_FORMAT == "MOZILLA":
211       title = title.replace("'", "&#39;")
212    return title
213
214 def unquote_title(title):
215    if BKMK_FORMAT == "MOZILLA":
216       from HTMLParser import HTMLParser
217       title = HTMLParser().unescape(title.replace("&amp;", '&').decode('utf-8'))
218       title = title.encode('utf-8').replace("&#39;", "'")
219    return title