From 7bc97fc1d57dbae21c1feab20ce112e3be78c634 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 28 Nov 2023 19:47:38 +0300 Subject: [PATCH] Fix(Py3): Use `urllib.parse.urlsplit()` --- bin/GET.py | 68 ++++++++++++--------------------------- bin/HEAD.py | 67 +++++++++----------------------------- bin/webbrowser-encode-url | 42 +++++++++--------------- bin/wget-download | 39 ++++++++-------------- 4 files changed, 66 insertions(+), 150 deletions(-) diff --git a/bin/GET.py b/bin/GET.py index 0e18f5f..2f00d89 100755 --- a/bin/GET.py +++ b/bin/GET.py @@ -1,49 +1,21 @@ #! /usr/bin/env python3 -try: - PY2 = False - from urllib.parse import parse_qsl, urlencode, \ - quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery - from urllib.request import urlretrieve - import urllib.request -except ImportError: - PY2 = True - from cgi import parse_qsl - from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery, urlretrieve - +from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode +from urllib.request import urlretrieve import os +import socket import sys -import urllib -from m_lib.defenc import default_encoding +import urllib.request url = sys.argv[1] -if PY2: - _urlopener = urllib._urlopener = urllib.FancyURLopener() -else: - _urlopener = urllib.request._opener = urllib.request.FancyURLopener() - -protocol, request = splittype(url) -user, password, port = None, None, None -host, path = splithost(request) -if host: - user, host = splituser(host) - if user: - user, password = splitpasswd(user) - host, port = splitport(host) - if port: port = int(port) -path, tag = splittag(path) -path, query = splitquery(path) -path = unquote(path) -if tag: tag = unquote_plus(tag) -if query: - qlist = [] - for name, value in parse_qsl(query): - qlist.append((name, value)) +split_results = urlsplit(url) +protocol, netloc, path, query, tag = split_results +user = split_results.username +password = split_results.password +host = split_results.hostname +port = split_results.port +qlist = parse_qsl(query) url = protocol + "://" if user: @@ -52,11 +24,7 @@ if user: url += ':' + quote(password) url += '@' if host: - if PY2: - host = host.decode(default_encoding) - host = host.encode('idna') - if not PY2: - host = host.decode('ascii') + host = host.encode('idna').decode('ascii') url += host if port: url += ':%d' % port @@ -74,12 +42,16 @@ if tag: # without "compatible" user agent; I don't know if such sites are still around, # but this header doesn't cause any harm so I'd better continue to use it. # UPDATE: I saw a number of sites that forbid "Mozilla compatible" -if PY2: - urllib_version = urllib.__version__ -else: - urllib_version = urllib.request.__version__ +urllib_version = urllib.request.__version__ server_version = "Python-urllib/%s" % urllib_version + +class MyURLopener(urllib.request.URLopener): + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + return urllib.request.URLopener.open(self, fullurl, data) + + +_urlopener = urllib.request._opener = MyURLopener() _urlopener.addheaders[0] = ('User-agent', server_version) _urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0")) diff --git a/bin/HEAD.py b/bin/HEAD.py index c5d06a6..6c0e5f7 100755 --- a/bin/HEAD.py +++ b/bin/HEAD.py @@ -1,45 +1,19 @@ #! /usr/bin/env python3 -try: - PY2 = False - from http.client import HTTPConnection - from urllib.parse import parse_qsl, urlencode, \ - quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery - import urllib.request -except ImportError: - PY2 = True - from cgi import parse_qsl - from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery - from httplib import HTTPConnection - +from http.client import HTTPConnection, HTTPSConnection +from urllib.parse import urlsplit, parse_qsl, urlencode, quote, quote_plus import sys -import urllib -from m_lib.defenc import default_encoding +import urllib.request url = sys.argv[1] -protocol, request = splittype(url) -user, password, port = None, None, None -host, path = splithost(request) -if host: - user, host = splituser(host) - if user: - user, password = splitpasswd(user) - host, port = splitport(host) - if port: port = int(port) -path, tag = splittag(path) -path, query = splitquery(path) -path = unquote(path) -if tag: tag = unquote_plus(tag) - -if query: - qlist = [] - for name, value in parse_qsl(query): - qlist.append((name, value)) +split_results = urlsplit(url) +protocol, netloc, path, query, tag = split_results +user = split_results.username +password = split_results.password +host = split_results.hostname +port = split_results.port +qlist = parse_qsl(query) url = protocol + "://" if user: @@ -48,11 +22,7 @@ if user: url += ':' + quote(password) url += '@' if host: - if PY2: - host = host.decode(default_encoding) - host = host.encode('idna') - if not PY2: - host = host.decode('ascii') + host = host.encode('idna').decode('ascii') url += host if port: url += ':%d' % port @@ -66,23 +36,18 @@ if query: if tag: url += '#' + quote_plus(tag) -server = HTTPConnection(host, port) +if protocol == "https": + server = HTTPSConnection(host, port) +else: + server = HTTPConnection(host, port) server.set_debuglevel(1) - server.putrequest("HEAD", path) -if port: - server.putheader("Host", '%s:%d' % (host, port)) -else: - server.putheader("Host", host) # I remember seeing some sites that return broken HTML or even HTTP response # without "compatible" user agent; I don't know if such sites are still around, # but this header doesn't cause any harm so I'd better continue to use it. # UPDATE: I saw a number of sites that forbid "Mozilla compatible" -if PY2: - urllib_version = urllib.__version__ -else: - urllib_version = urllib.request.__version__ +urllib_version = urllib.request.__version__ client_version = "Python-urllib/%s" % urllib_version server.putheader('User-agent', client_version) diff --git a/bin/webbrowser-encode-url b/bin/webbrowser-encode-url index 8c88c9a..e30b6ff 100755 --- a/bin/webbrowser-encode-url +++ b/bin/webbrowser-encode-url @@ -1,17 +1,9 @@ #! /usr/bin/env python3 -try: - from urllib.parse import parse_qsl, urlencode, \ - quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery -except ImportError: - from cgi import parse_qsl - from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery from getopt import getopt, GetoptError +from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode import sys + from m_lib.defenc import default_encoding # This must be imported and called before webbrowser @@ -19,13 +11,17 @@ from m_lib.defenc import default_encoding from browser_stack import set_current_browser set_current_browser() -import webbrowser +import webbrowser # noqa: E402 module level import not at top of file + def usage(): - sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' % sys.argv[0]) + sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' + % sys.argv[0]) + try: - options, arguments = getopt(sys.argv[1:], 'e:nt', ['encoding=', 'newwin', 'tab']) + options, arguments = getopt( + sys.argv[1:], 'e:nt', ['encoding=', 'newwin', 'tab']) except GetoptError: usage() @@ -47,19 +43,13 @@ if not encoding: encoding = default_encoding url = arguments[0] -protocol, request = splittype(url) -user, password, port = None, None, None -host, path = splithost(request) -if host: - user, host = splituser(host) - if user: - user, password = splitpasswd(user) - host, port = splitport(host) - if port: port = int(port) -path, tag = splittag(path) -path, query = splitquery(path) -path = unquote(path) -if tag: tag = unquote_plus(tag) + +split_results = urlsplit(url) +protocol, netloc, path, query, tag = split_results +user = split_results.username +password = split_results.password +host = split_results.hostname +port = split_results.port if query: qlist = [] diff --git a/bin/wget-download b/bin/wget-download index dbd6ef5..a851395 100755 --- a/bin/wget-download +++ b/bin/wget-download @@ -1,22 +1,17 @@ #! /usr/bin/env python3 -try: - from urllib.parse import parse_qsl, urlencode, \ - quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery -except ImportError: - from cgi import parse_qsl - from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \ - splittype, splithost, splituser, splitpasswd, \ - splitport, splittag, splitquery from getopt import getopt, GetoptError -import os, posixpath +from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode +import os +import posixpath import sys from m_lib.defenc import default_encoding + def usage(): - sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' % sys.argv[0]) + sys.exit('Usage: %s [-e|--encoding=encoding] [-n|--newwin|-t|--tab] URL' + % sys.argv[0]) + try: options, arguments = getopt(sys.argv[1:], 'e:', ['encoding=']) @@ -36,19 +31,13 @@ if not encoding: encoding = default_encoding url = arguments[0] -protocol, request = splittype(url) -user, password, port = None, None, None -host, path = splithost(request) -if host: - user, host = splituser(host) - if user: - user, password = splitpasswd(user) - host, port = splitport(host) - if port: port = int(port) -path, tag = splittag(path) -path, query = splitquery(path) -path = unquote(path) -if tag: tag = unquote_plus(tag) + +split_results = urlsplit(url) +protocol, netloc, path, query, tag = split_results +user = split_results.username +password = split_results.password +host = split_results.hostname +port = split_results.port if query: qlist = [] -- 2.39.5