-#! /usr/bin/env python
+#! /usr/bin/env python3
+from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
+from urllib.request import urlretrieve
+import os
+import socket
import sys
-url = sys.argv[1]
-
-import urllib
-urllib._urlopener = urllib.FancyURLopener()
-
-from cgi import parse_qsl
-from m_lib.defenc import default_encoding
+import urllib.request
-protocol, request = urllib.splittype(url)
-user, password, port = None, None, None
-host, path = urllib.splithost(request)
-if host:
- user, host = urllib.splituser(host)
- if user:
- user, password = urllib.splitpasswd(user)
- host, port = urllib.splitport(host)
- if port: port = int(port)
-path, tag = urllib.splittag(path)
-path, query = urllib.splitquery(path)
-path = urllib.unquote(path)
-if tag: tag = urllib.unquote_plus(tag)
+url = sys.argv[1]
-if query:
- qlist = []
- for name, value in parse_qsl(query):
- qlist.append((name, value))
+split_results = urlsplit(url)
+protocol, netloc, path, query, tag = split_results
+user = split_results.username
+password = split_results.password
+host = split_results.hostname
+port = split_results.port
+qlist = parse_qsl(query)
url = protocol + "://"
if user:
- url += urllib.quote(user)
- if password:
- url += ':' + urllib.quote(password)
- url += '@'
+ url += quote(user)
+ if password:
+ url += ':' + quote(password)
+ url += '@'
if host:
- url += host.decode(default_encoding).encode('idna')
- if port:
- url += ':%d' % port
+ host = host.encode('idna').decode('ascii')
+ url += host
+ if port:
+ url += ':%d' % port
if path:
- if protocol == "file":
- url += urllib.quote(path)
- else:
- url += urllib.quote(path)
+ if protocol == "file":
+ url += quote(path)
+ else:
+ url += quote(path)
if query:
- url += '?' + urllib.urlencode(qlist)
+ url += '?' + urlencode(qlist)
if tag:
- url += '#' + urllib.quote_plus(tag)
+ url += '#' + quote_plus(tag)
# I remember seeing some sites that return broken HTML or even HTTP response
# without "compatible" user agent; I don't know if such sites are still around,
# but this header doesn't cause any harm so I'd better continue to use it.
# UPDATE: I saw a number of sites that forbid "Mozilla compatible"
-server_version = "Python-urllib/%s" % urllib.__version__
-urllib._urlopener.addheaders[0] = ('User-agent', server_version)
-urllib._urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0"))
+urllib_version = urllib.request.__version__
+server_version = "Python-urllib/%s" % urllib_version
-import os
-dest_file = os.path.basename(url)
+class MyURLopener(urllib.request.URLopener):
+ def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ return urllib.request.URLopener.open(self, fullurl, data)
+
+
+_urlopener = urllib.request._opener = MyURLopener()
+_urlopener.addheaders[0] = ('User-agent', server_version)
+_urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0"))
+
+dest_file = os.path.basename(url)
if not dest_file:
- dest_file = "_index.html"
+ dest_file = "_index.html"
-filename, headers = urllib.urlretrieve(url, dest_file)
+filename, headers = urlretrieve(url, filename=dest_file)
-if headers.has_key("last-modified"):
- from m_lib.net.www.util import parse_time
- last_modified = parse_time(headers["last-modified"])
- if last_modified:
- os.utime(dest_file, (last_modified, last_modified))
+if "last-modified" in headers:
+ from m_lib.net.www.util import parse_time
+ last_modified = parse_time(headers["last-modified"])
+ if last_modified:
+ os.utime(dest_file, (last_modified, last_modified))