self.outfile.write(ind_s*level + "</DL><p>\n")
def bookmark(self, b, level):
- self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href.encode(default_encoding), b.add_date))
+ self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href, b.add_date))
if b.last_visit: self.outfile.write(' LAST_VISIT="%s"' % b.last_visit)
self.outfile.write(' LAST_MODIFIED="%s"' % b.last_modified)
if BKMK_FORMAT == "MOZILLA":
]
-import os
+import os, urllib
BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
isBookmark = 1
def __init__(self, href, add_date, last_visit=None, last_modified=None,
- keyword=None, comment='', icon_href=None, icon=None, charset=None):
- if isinstance(href, str):
- try:
- href = href.decode('idna')
- except UnicodeDecodeError: # Non-ascii href
- href = href.decode('utf-8')
- elif not isinstance(href, unicode):
- raise TypeError("Bookmark's href must be str or unicode, not %r" % type(href))
+ keyword=None, comment='', icon_href=None, icon=None,
+ charset=None, parser_charset=None):
+ protocol, request = urllib.splittype(href)
+ user, password, port = None, None, None
+ host, path = urllib.splithost(request)
+ if host:
+ user, host = urllib.splituser(host)
+ if user:
+ user, password = urllib.splitpasswd(user)
+ host, port = urllib.splitport(host)
+ if port: port = int(port)
+ path, tag = urllib.splittag(path)
+ path, query = urllib.splitquery(path)
+ path = urllib.unquote(path)
+ if tag: tag = urllib.unquote_plus(tag)
+
+ if host: # host can be None for Mozilla's place: URLs
+ host = host.decode(parser_charset or 'utf-8').encode('idna')
+
+ href = protocol + "://"
+ if user:
+ href += urllib.quote(user)
+ if password:
+ href += ':' + urllib.quote(password)
+ href += '@'
+ if host:
+ href += host
+ if port:
+ href += ':%d' % port
+ if path:
+ if protocol == "file":
+ href += urllib.quote(path)
+ else:
+ href += urllib.quote(path)
+ if query:
+ href += '?' + query
+ if tag:
+ href += '#' + urllib.quote_plus(tag)
+
self.href = href
self.add_date = add_date
self.last_visit = last_visit
debug("Bookmark points to: `%s'" % href)
bookmark = Bookmark(href, add_date, last_visit, last_modified,
- keyword=keyword, icon=icon, charset=charset)
+ keyword=keyword, icon=icon,
+ charset=charset, parser_charset=self.charset or default_encoding)
self.current_object = bookmark
self.current_folder.append(bookmark)
self.urls += 1
A set of classes, libraries, programs and plugins I use to manipulate my
bookmarks.html.
-WHAT'S NEW in version 4.4.0 (2011-01-07).
+WHAT'S NEW in version 4.5.0 (2011-??-??).
-Moved BeautifulSoup.py and subproc.py from Robots/ to the top-level directory.
+Recode international domain names.
-Moved parse_html.py and its submodules to a separate parse_html package.
-
-Added statistics code to parse_html, got a statistics on parser
-success/failrure rate, reordered parsers.
-
-Removed old cruft.
-
-
-WHAT'S NEW in version 4.3.1 (2011-01-03).
-
-Get favicon before HTML redirect (refresh).
-
-Get favicon even if it's of a wrong type; many sites return favicon as
-text/plain or application/*; the only exception is text/html which is usually
-an error page instead of error 404.
-
-
-WHAT'S NEW in version 4.3.0 (2011-01-01).
-
-Robots no longer have one global temporary file - there are at least two
-(html and favicon), and in the future there will be more for
-asynchronous robot(s) that will test many URLs in parallel.
WHERE TO GET