X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bin%2FGET.py;h=2f00d89b2975180179c78681d873d7e66db3d663;hb=refs%2Fheads%2Fmaster;hp=f7fddbd46599faddd01845f16a686dafe973657b;hpb=f46bd4d41cc7f243bc8a321effee5200aa69e709;p=dotfiles.git diff --git a/bin/GET.py b/bin/GET.py index f7fddbd..2f00d89 100755 --- a/bin/GET.py +++ b/bin/GET.py @@ -1,71 +1,68 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 +from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode +from urllib.request import urlretrieve +import os +import socket import sys -url = sys.argv[1] - -import urllib -urllib._urlopener = urllib.FancyURLopener() - -from cgi import parse_qsl -from m_lib.defenc import default_encoding +import urllib.request -protocol, request = urllib.splittype(url) -user, password, port = None, None, None -host, path = urllib.splithost(request) -if host: - user, host = urllib.splituser(host) - if user: - user, password = urllib.splitpasswd(user) - host, port = urllib.splitport(host) - if port: port = int(port) -path, tag = urllib.splittag(path) -path, query = urllib.splitquery(path) -path = urllib.unquote(path) -if tag: tag = urllib.unquote_plus(tag) +url = sys.argv[1] -if query: - qlist = [] - for name, value in parse_qsl(query): - qlist.append((name, value)) +split_results = urlsplit(url) +protocol, netloc, path, query, tag = split_results +user = split_results.username +password = split_results.password +host = split_results.hostname +port = split_results.port +qlist = parse_qsl(query) url = protocol + "://" if user: - url += urllib.quote(user) - if password: - url += ':' + urllib.quote(password) - url += '@' + url += quote(user) + if password: + url += ':' + quote(password) + url += '@' if host: - url += host.decode(default_encoding).encode('idna') - if port: - url += ':%d' % port + host = host.encode('idna').decode('ascii') + url += host + if port: + url += ':%d' % port if path: - if protocol == "file": - url += urllib.quote(path) - else: - url += urllib.quote(path) + if protocol == "file": + url += quote(path) + else: + url += quote(path) if query: - url += '?' + urllib.urlencode(qlist) + url += '?' + urlencode(qlist) if tag: - url += '#' + urllib.quote_plus(tag) + url += '#' + quote_plus(tag) # I remember seeing some sites that return broken HTML or even HTTP response # without "compatible" user agent; I don't know if such sites are still around, # but this header doesn't cause any harm so I'd better continue to use it. # UPDATE: I saw a number of sites that forbid "Mozilla compatible" -server_version = "Python-urllib/%s" % urllib.__version__ -urllib._urlopener.addheaders[0] = ('User-agent', server_version) -urllib._urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0")) +urllib_version = urllib.request.__version__ +server_version = "Python-urllib/%s" % urllib_version -import os -dest_file = os.path.basename(url) +class MyURLopener(urllib.request.URLopener): + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + return urllib.request.URLopener.open(self, fullurl, data) + + +_urlopener = urllib.request._opener = MyURLopener() +_urlopener.addheaders[0] = ('User-agent', server_version) +_urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0")) + +dest_file = os.path.basename(url) if not dest_file: - dest_file = "_index.html" + dest_file = "_index.html" -filename, headers = urllib.urlretrieve(url, dest_file) +filename, headers = urlretrieve(url, filename=dest_file) -if headers.has_key("last-modified"): - from m_lib.net.www.util import parse_time - last_modified = parse_time(headers["last-modified"]) - if last_modified: - os.utime(dest_file, (last_modified, last_modified)) +if "last-modified" in headers: + from m_lib.net.www.util import parse_time + last_modified = parse_time(headers["last-modified"]) + if last_modified: + os.utime(dest_file, (last_modified, last_modified))