]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fix(bkmk_rcurl): IDNA-encode URLs 5.4.1
authorOleg Broytman <phd@phdru.name>
Mon, 5 Aug 2024 12:56:23 +0000 (15:56 +0300)
committerOleg Broytman <phd@phdru.name>
Mon, 5 Aug 2024 13:07:18 +0000 (16:07 +0300)
PycURL doesn't encode URLs itself
and requires URLs to be in ASCII encoding.

Robots/bkmk_rcurl.py
doc/ANNOUNCE
doc/ChangeLog

index e4ce184c8e14aecc907f52668aa8b7775aa12596..bbbc9d12cab82bae385b62dd9b8df57c2da96862 100644 (file)
@@ -11,6 +11,9 @@ __license__ = "GNU GPL"
 __all__ = ['robot_curl']
 
 
+from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
+
+from m_lib.defenc import default_encoding
 import certifi
 import pycurl
 
@@ -53,6 +56,10 @@ class robot_curl(robot_base):
 
         curl.setopt(pycurl.HTTPGET, 1)
         curl.setopt(pycurl.HTTPHEADER, headers)
+        try:
+            url.encode('ascii')
+        except UnicodeEncodeError:
+            url = encode_url(url, bookmark.charset)
         curl.setopt(pycurl.URL, url)
         try:
             curl.perform()
@@ -89,3 +96,57 @@ class robot_curl(robot_base):
 
     def get_ftp_welcome(self):
         return ''  # We doen't store welcome message yet
+
+
+def encode_url(url, encoding):
+    if not encoding:
+        encoding = default_encoding
+
+    split_results = urlsplit(url)
+    protocol, netloc, path, query, tag = split_results
+    user = split_results.username
+    password = split_results.password
+    host = split_results.hostname
+    port = split_results.port
+
+    if query:
+        qlist = []
+        for name, value in parse_qsl(query):
+            if isinstance(name, bytes):
+                name = name.decode(default_encoding)
+                value = value.decode(default_encoding)
+            name = name.encode(encoding)
+            value = value.encode(encoding)
+            qlist.append((name, value))
+
+    url = protocol + "://"
+    if user:
+        if isinstance(user, bytes):
+            user = user.decode(default_encoding)
+        url += quote(user.encode(encoding))
+        if password:
+            if isinstance(password, bytes):
+                password = password.decode(default_encoding)
+            url += ':' + quote(password.encode(encoding))
+        url += '@'
+    if host:
+        if isinstance(host, bytes):
+            host = host.decode(encoding)
+        url += host.encode('idna').decode('ascii')
+        if port:
+            url += ':%d' % port
+    if path:
+        if protocol == "file":
+            url += quote(path)
+        else:
+            if isinstance(path, bytes):
+                path = path.decode(default_encoding)
+            url += quote(path.encode(encoding))
+    if query:
+        url += '?' + urlencode(qlist)
+    if tag:
+        if isinstance(tag, bytes):
+            tag = tag.decode(default_encoding)
+        url += '#' + quote_plus(tag.encode(encoding))
+
+    return url
index ddfb31f46f71a452faa894caa91d4a926648d8cd..aec114b8db3e63feb1f683f6cf9f886837fc43f2 100644 (file)
@@ -9,6 +9,9 @@ WHAT'S NEW
 
 Version 5.4.1 (2024-08-04)
 
+   Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
+   and requires URLs to be in ASCII encoding.
+
    Separate connection timeout for PycURL robot.
    Also will be used for aiohttp.
 
index 57481c0ef8242aca473d8f550f44a0d0d4d091fd..da565471f2bf940d0d04e9a19bf15a5cbb4a2532 100644 (file)
@@ -1,5 +1,8 @@
 Version 5.4.1 (2024-08-04)
 
+   Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
+   and requires URLs to be in ASCII encoding.
+
    Separate connection timeout for PycURL robot.
    Also will be used for aiohttp.