]> git.phdru.name Git - dotfiles.git/commitdiff
Fix(Py3): Use `urllib.parse.urlsplit()`
authorOleg Broytman <phd@phdru.name>
Tue, 28 Nov 2023 16:47:38 +0000 (19:47 +0300)
committerOleg Broytman <phd@phdru.name>
Thu, 30 Nov 2023 20:27:55 +0000 (23:27 +0300)
bin/GET.py
bin/HEAD.py
bin/webbrowser-encode-url
bin/wget-download

index 0e18f5f86e5bca17984227fc8438d78ee723fd3c..2f00d89b2975180179c78681d873d7e66db3d663 100755 (executable)
@@ -1,49 +1,21 @@
 #! /usr/bin/env python3
 
-try:
-    PY2 = False
-    from urllib.parse import parse_qsl, urlencode, \
-        quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
-    from urllib.request import urlretrieve
-    import urllib.request
-except ImportError:
-    PY2 = True
-    from cgi import parse_qsl
-    from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery, urlretrieve
-
+from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
+from urllib.request import urlretrieve
 import os
+import socket
 import sys
-import urllib
-from m_lib.defenc import default_encoding
+import urllib.request
 
 url = sys.argv[1]
-if PY2:
-    _urlopener = urllib._urlopener = urllib.FancyURLopener()
-else:
-    _urlopener = urllib.request._opener = urllib.request.FancyURLopener()
-
-protocol, request = splittype(url)
-user, password, port = None, None, None
-host, path = splithost(request)
-if host:
-    user, host = splituser(host)
-    if user:
-        user, password = splitpasswd(user)
-    host, port = splitport(host)
-    if port: port = int(port)
-path, tag = splittag(path)
-path, query = splitquery(path)
-path = unquote(path)
-if tag: tag = unquote_plus(tag)
 
-if query:
-    qlist = []
-    for name, value in parse_qsl(query):
-        qlist.append((name, value))
+split_results = urlsplit(url)
+protocol, netloc, path, query, tag = split_results
+user = split_results.username
+password = split_results.password
+host = split_results.hostname
+port = split_results.port
+qlist = parse_qsl(query)
 
 url = protocol + "://"
 if user:
@@ -52,11 +24,7 @@ if user:
         url += ':' + quote(password)
     url += '@'
 if host:
-    if PY2:
-        host = host.decode(default_encoding)
-    host = host.encode('idna')
-    if not PY2:
-        host = host.decode('ascii')
+    host = host.encode('idna').decode('ascii')
     url += host
     if port:
         url += ':%d' % port
@@ -74,12 +42,16 @@ if tag:
 # without "compatible" user agent; I don't know if such sites are still around,
 # but this header doesn't cause any harm so I'd better continue to use it.
 # UPDATE: I saw a number of sites that forbid "Mozilla compatible"
-if PY2:
-    urllib_version = urllib.__version__
-else:
-    urllib_version = urllib.request.__version__
+urllib_version = urllib.request.__version__
 server_version = "Python-urllib/%s" % urllib_version
 
+
+class MyURLopener(urllib.request.URLopener):
+    def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+        return urllib.request.URLopener.open(self, fullurl, data)
+
+
+_urlopener = urllib.request._opener = MyURLopener()
 _urlopener.addheaders[0] = ('User-agent', server_version)
 _urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0"))
 
index c5d06a646e669ee657021565c438296852947560..6c0e5f7eb0d3bd30dddf2ad7d64dd8b2608fb5ff 100755 (executable)
@@ -1,45 +1,19 @@
 #! /usr/bin/env python3
 
-try:
-    PY2 = False
-    from http.client import HTTPConnection
-    from urllib.parse import parse_qsl, urlencode, \
-        quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
-    import urllib.request
-except ImportError:
-    PY2 = True
-    from cgi import parse_qsl
-    from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
-    from httplib import HTTPConnection
-
+from http.client import HTTPConnection, HTTPSConnection
+from urllib.parse import urlsplit, parse_qsl, urlencode, quote, quote_plus
 import sys
-import urllib
-from m_lib.defenc import default_encoding
+import urllib.request
 
 url = sys.argv[1]
 
-protocol, request = splittype(url)
-user, password, port = None, None, None
-host, path = splithost(request)
-if host:
-    user, host = splituser(host)
-    if user:
-        user, password = splitpasswd(user)
-    host, port = splitport(host)
-    if port: port = int(port)
-path, tag = splittag(path)
-path, query = splitquery(path)
-path = unquote(path)
-if tag: tag = unquote_plus(tag)
-
-if query:
-    qlist = []
-    for name, value in parse_qsl(query):
-        qlist.append((name, value))
+split_results = urlsplit(url)
+protocol, netloc, path, query, tag = split_results
+user = split_results.username
+password = split_results.password
+host = split_results.hostname
+port = split_results.port
+qlist = parse_qsl(query)
 
 url = protocol + "://"
 if user:
@@ -48,11 +22,7 @@ if user:
         url += ':' + quote(password)
     url += '@'
 if host:
-    if PY2:
-        host = host.decode(default_encoding)
-    host = host.encode('idna')
-    if not PY2:
-        host = host.decode('ascii')
+    host = host.encode('idna').decode('ascii')
     url += host
     if port:
         url += ':%d' % port
@@ -66,23 +36,18 @@ if query:
 if tag:
     url += '#' + quote_plus(tag)
 
-server = HTTPConnection(host, port)
+if protocol == "https":
+    server = HTTPSConnection(host, port)
+else:
+    server = HTTPConnection(host, port)
 server.set_debuglevel(1)
-
 server.putrequest("HEAD", path)
-if port:
-    server.putheader("Host", '%s:%d' % (host, port))
-else:
-    server.putheader("Host", host)
 
 # I remember seeing some sites that return broken HTML or even HTTP response
 # without "compatible" user agent; I don't know if such sites are still around,
 # but this header doesn't cause any harm so I'd better continue to use it.
 # UPDATE: I saw a number of sites that forbid "Mozilla compatible"
-if PY2:
-    urllib_version = urllib.__version__
-else:
-    urllib_version = urllib.request.__version__
+urllib_version = urllib.request.__version__
 client_version = "Python-urllib/%s" % urllib_version
 server.putheader('User-agent', client_version)
 
index 8c88c9a6fadadfa435544edeabc5d5d43cf70d8c..e30b6ff26d496d86ac49d8d627653216be927a92 100755 (executable)
@@ -1,17 +1,9 @@
 #! /usr/bin/env python3
 
-try:
-    from urllib.parse import parse_qsl, urlencode, \
-        quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
-except ImportError:
-    from cgi import parse_qsl
-    from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
 from getopt import getopt, GetoptError
+from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
 import sys
+
 from m_lib.defenc import default_encoding
 
 # This must be imported and called before webbrowser
@@ -19,13 +11,17 @@ from m_lib.defenc import default_encoding
 from browser_stack import set_current_browser
 set_current_browser()
 
-import webbrowser
+import webbrowser  # noqa: E402 module level import not at top of file
+
 
 def usage():
-    sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' % sys.argv[0])
+    sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL'
+             % sys.argv[0])
+
 
 try:
-    options, arguments = getopt(sys.argv[1:], 'e:nt', ['encoding=', 'newwin', 'tab'])
+    options, arguments = getopt(
+        sys.argv[1:], 'e:nt', ['encoding=', 'newwin', 'tab'])
 except GetoptError:
     usage()
 
@@ -47,19 +43,13 @@ if not encoding:
     encoding = default_encoding
 
 url = arguments[0]
-protocol, request = splittype(url)
-user, password, port = None, None, None
-host, path = splithost(request)
-if host:
-    user, host = splituser(host)
-    if user:
-        user, password = splitpasswd(user)
-    host, port = splitport(host)
-    if port: port = int(port)
-path, tag = splittag(path)
-path, query = splitquery(path)
-path = unquote(path)
-if tag: tag = unquote_plus(tag)
+
+split_results = urlsplit(url)
+protocol, netloc, path, query, tag = split_results
+user = split_results.username
+password = split_results.password
+host = split_results.hostname
+port = split_results.port
 
 if query:
     qlist = []
index dbd6ef5bfd4cf13390630223ac3f093740ccd86a..a851395a6b36aa8d610c146663beea21712ba78c 100755 (executable)
@@ -1,22 +1,17 @@
 #! /usr/bin/env python3
 
-try:
-    from urllib.parse import parse_qsl, urlencode, \
-        quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
-except ImportError:
-    from cgi import parse_qsl
-    from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \
-        splittype, splithost, splituser, splitpasswd, \
-        splitport, splittag, splitquery
 from getopt import getopt, GetoptError
-import os, posixpath
+from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
+import os
+import posixpath
 import sys
 from m_lib.defenc import default_encoding
 
+
 def usage():
-    sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' % sys.argv[0])
+    sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL'
+             % sys.argv[0])
+
 
 try:
     options, arguments = getopt(sys.argv[1:], 'e:', ['encoding='])
@@ -36,19 +31,13 @@ if not encoding:
     encoding = default_encoding
 
 url = arguments[0]
-protocol, request = splittype(url)
-user, password, port = None, None, None
-host, path = splithost(request)
-if host:
-    user, host = splituser(host)
-    if user:
-        user, password = splitpasswd(user)
-    host, port = splitport(host)
-    if port: port = int(port)
-path, tag = splittag(path)
-path, query = splitquery(path)
-path = unquote(path)
-if tag: tag = unquote_plus(tag)
+
+split_results = urlsplit(url)
+protocol, netloc, path, query, tag = split_results
+user = split_results.username
+password = split_results.password
+host = split_results.hostname
+port = split_results.port
 
 if query:
     qlist = []