]> git.phdru.name Git - bookmarks_db.git/blob - bkmk_objects.py
Removed svn:keywords. Extended copyright to 2012.
[bookmarks_db.git] / bkmk_objects.py
1 """Objects to represent bookmarks.html structure
2
3 This file is a part of Bookmarks database and Internet robot.
4 """
5
6 __author__ = "Oleg Broytman <phd@phdru.name>"
7 __copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
8 __license__ = "GNU GPL"
9
10 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
11     'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
12     'quote_title', 'unquote_title',
13 ]
14
15
16 import os, urllib
17
18 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
19
20 class Folder(list):
21    isFolder = 1
22    isBookmark = 0
23
24    def __init__(self, add_date=None, comment='', last_modified=None):
25       super(Folder, self).__init__()
26       self.comment = comment
27       self.add_date = add_date
28       self.last_modified = last_modified
29
30    def walk_depth(self, walker, level=0):
31       if hasattr(self, "header"): # root folder
32          prune = 0
33          walker.root_folder(self)
34       else:
35          prune = walker.prune_folder(self)
36          if not prune:
37             walker.start_folder(self, level)
38
39       if not prune:
40          for object in self:
41             if object.isFolder:
42                object.walk_depth(walker, level+1)
43             elif object.isBookmark:
44                walker.bookmark(object, level)
45             else:
46                walker.ruler(object, level)
47
48          walker.end_folder(self, level)
49
50
51 class Bookmark:
52    isFolder = 0
53    isBookmark = 1
54
55    def __init__(self, href, add_date, last_visit=None, last_modified=None,
56          keyword=None, comment='', icon_href=None, icon=None,
57          charset=None, parser_charset=None):
58       protocol, request = urllib.splittype(href)
59       user, password, port = None, None, None
60       host, path = urllib.splithost(request)
61       if host:
62          user, host = urllib.splituser(host)
63          if user:
64             user, password = urllib.splitpasswd(user)
65          host, port = urllib.splitport(host)
66          if port: port = int(port)
67
68       href = protocol + "://"
69       if user:
70          href += urllib.quote(user)
71          if password:
72             href += ':' + urllib.quote(password)
73          href += '@'
74       if host:
75          href += host.decode(parser_charset or 'utf-8').encode('idna')
76          if port:
77             href += ':%d' % port
78       if path:
79          href += path
80
81       self.href = href
82       self.add_date = add_date
83       self.last_visit = last_visit
84       self.last_modified = last_modified
85       self.keyword = keyword
86       self.comment = comment
87       self.icon_href = icon_href
88       self.icon = icon
89       self.charset = charset
90
91
92 class Ruler:
93    isFolder = 0
94    isBookmark = 0
95
96
97 class Walker:
98    """
99       Interface class. Any instance that will be passed to Folder.walk_depth
100       may be derived from this class. It is not mandatory - unlike Java
101       Python does not require interface classes; but it is convenient to have
102       some methods predefined to no-op, in case you do not want to
103       provide end_folder etc.
104    """
105
106    def root_folder(self, r):
107       pass
108
109    def start_folder(self, f, level):
110       pass
111
112    def end_folder(self, f, level):
113       pass
114
115    def bookmark(self, b, level):
116       pass
117
118    def ruler(self, r, level):
119       pass
120
121    def prune_folder(self, folder):
122       return 0
123
124
125 class Writer(Walker):
126    def __init__(self, outfile, prune=None):
127       self.outfile = outfile
128       self.prune = prune
129
130    def prune_folder(self, folder):
131       return self.prune == folder.name
132
133
134 class Robot:
135    def __init__(self, log):
136       self.log = log
137
138    def stop(self):
139       pass # Nothing to do on cleanup
140
141
142 # Helper class to make inverese links (nodes linked to their parent)
143 class InverseLinker(Walker):
144    def root_folder(self, r):
145       self.parent_stack = [r]
146
147    def start_folder(self, f, level):
148       f.parent = self.parent_stack[-1]
149       self.parent_stack.append(f) # Push the folder onto the stack of parents
150
151    def end_folder(self, f, level):
152       del self.parent_stack[-1]   # Pop off the stack
153
154    def bookmark(self, b, level):
155       b.parent = self.parent_stack[-1]
156
157    def ruler(self, r, level):
158       r.parent = self.parent_stack[-1]
159
160
161 # Helper class to make linear represenatation of the tree
162 class Linear(Walker):
163    def root_folder(self, r):
164       r.linear = [r]
165       self.linear = r.linear
166
167    def add_object(self, object):
168       self.linear.append(object)
169
170    def start_folder(self, f, level):
171       self.add_object(f)
172
173    def bookmark(self, b, level):
174       self.add_object(b)
175
176    def ruler(self, r, level):
177       self.add_object(r)
178
179
180 # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
181 def make_linear(root_folder):
182    linker = InverseLinker()
183    root_folder.walk_depth(linker)
184
185    linear = Linear()
186    root_folder.walk_depth(linear)
187
188
189 # Helper, opposite of make_linear - make a tree from the linked linear representation
190 def make_tree(linear):
191    root_folder = linear[0]
192    del linear[0]
193
194    for object in linear:
195       object.parent.append(object)
196
197    return root_folder
198
199 def break_tree(linear):
200    del linear[0]
201
202    for object in linear:
203       del object.parent
204
205
206 def quote_title(title):
207    if BKMK_FORMAT == "MOZILLA":
208       title = title.replace("'", "&#39;")
209    return title
210
211 def unquote_title(title):
212    if BKMK_FORMAT == "MOZILLA":
213       from HTMLParser import HTMLParser
214       title = HTMLParser().unescape(title.replace("&amp;", '&').decode('utf-8'))
215       title = title.encode('utf-8').replace("&#39;", "'")
216    return title