]> git.phdru.name Git - bookmarks_db.git/blobdiff - bkmk_objects.py
Split hrefs into domain and path components; recode only domain.
[bookmarks_db.git] / bkmk_objects.py
index 00e186c794b8f1a32220c94b69fd7424bc25aeb4..be139edb3e205379733c8e7d38f8db42f2ce4c77 100644 (file)
@@ -16,7 +16,7 @@ __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
 ]
 
 
-import os
+import os, urllib
 
 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
 
@@ -56,14 +56,45 @@ class Bookmark:
    isBookmark = 1
 
    def __init__(self, href, add_date, last_visit=None, last_modified=None,
-         keyword=None, comment='', icon_href=None, icon=None, charset=None):
-      if isinstance(href, str):
-         try:
-            href = href.decode('idna')
-         except UnicodeDecodeError: # Non-ascii href
-            href = href.decode('utf-8')
-      elif not isinstance(href, unicode):
-          raise TypeError("Bookmark's href must be str or unicode, not %r" % type(href))
+         keyword=None, comment='', icon_href=None, icon=None,
+         charset=None, parser_charset=None):
+      protocol, request = urllib.splittype(href)
+      user, password, port = None, None, None
+      host, path = urllib.splithost(request)
+      if host:
+         user, host = urllib.splituser(host)
+         if user:
+            user, password = urllib.splitpasswd(user)
+         host, port = urllib.splitport(host)
+         if port: port = int(port)
+      path, tag = urllib.splittag(path)
+      path, query = urllib.splitquery(path)
+      path = urllib.unquote(path)
+      if tag: tag = urllib.unquote_plus(tag)
+
+      if host: # host can be None for Mozilla's place: URLs
+          host = host.decode(parser_charset or 'utf-8').encode('idna')
+
+      href = protocol + "://"
+      if user:
+         href += urllib.quote(user)
+         if password:
+            href += ':' + urllib.quote(password)
+         href += '@'
+      if host:
+         href += host
+         if port:
+            href += ':%d' % port
+      if path:
+         if protocol == "file":
+            href += urllib.quote(path)
+         else:
+            href += urllib.quote(path)
+      if query:
+         href += '?' + query
+      if tag:
+         href += '#' + urllib.quote_plus(tag)
+
       self.href = href
       self.add_date = add_date
       self.last_visit = last_visit