X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bin%2FGET.py;h=0e18f5f86e5bca17984227fc8438d78ee723fd3c;hb=6ce130621c1cd274c029c64c30f1c69ec2ac4ac9;hp=f7fddbd46599faddd01845f16a686dafe973657b;hpb=f46bd4d41cc7f243bc8a321effee5200aa69e709;p=dotfiles.git diff --git a/bin/GET.py b/bin/GET.py index f7fddbd..0e18f5f 100755 --- a/bin/GET.py +++ b/bin/GET.py @@ -1,71 +1,96 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 -import sys -url = sys.argv[1] +try: + PY2 = False + from urllib.parse import parse_qsl, urlencode, \ + quote, quote_plus, unquote, unquote_plus, \ + splittype, splithost, splituser, splitpasswd, \ + splitport, splittag, splitquery + from urllib.request import urlretrieve + import urllib.request +except ImportError: + PY2 = True + from cgi import parse_qsl + from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \ + splittype, splithost, splituser, splitpasswd, \ + splitport, splittag, splitquery, urlretrieve +import os +import sys import urllib -urllib._urlopener = urllib.FancyURLopener() - -from cgi import parse_qsl from m_lib.defenc import default_encoding -protocol, request = urllib.splittype(url) +url = sys.argv[1] +if PY2: + _urlopener = urllib._urlopener = urllib.FancyURLopener() +else: + _urlopener = urllib.request._opener = urllib.request.FancyURLopener() + +protocol, request = splittype(url) user, password, port = None, None, None -host, path = urllib.splithost(request) +host, path = splithost(request) if host: - user, host = urllib.splituser(host) - if user: - user, password = urllib.splitpasswd(user) - host, port = urllib.splitport(host) - if port: port = int(port) -path, tag = urllib.splittag(path) -path, query = urllib.splitquery(path) -path = urllib.unquote(path) -if tag: tag = urllib.unquote_plus(tag) + user, host = splituser(host) + if user: + user, password = splitpasswd(user) + host, port = splitport(host) + if port: port = int(port) +path, tag = splittag(path) +path, query = splitquery(path) +path = unquote(path) +if tag: tag = unquote_plus(tag) if query: - qlist = [] - for name, value in parse_qsl(query): - qlist.append((name, value)) + qlist = [] + for name, value in parse_qsl(query): + qlist.append((name, value)) url = protocol + "://" if user: - url += urllib.quote(user) - if password: - url += ':' + urllib.quote(password) - url += '@' + url += quote(user) + if password: + url += ':' + quote(password) + url += '@' if host: - url += host.decode(default_encoding).encode('idna') - if port: - url += ':%d' % port + if PY2: + host = host.decode(default_encoding) + host = host.encode('idna') + if not PY2: + host = host.decode('ascii') + url += host + if port: + url += ':%d' % port if path: - if protocol == "file": - url += urllib.quote(path) - else: - url += urllib.quote(path) + if protocol == "file": + url += quote(path) + else: + url += quote(path) if query: - url += '?' + urllib.urlencode(qlist) + url += '?' + urlencode(qlist) if tag: - url += '#' + urllib.quote_plus(tag) + url += '#' + quote_plus(tag) # I remember seeing some sites that return broken HTML or even HTTP response # without "compatible" user agent; I don't know if such sites are still around, # but this header doesn't cause any harm so I'd better continue to use it. # UPDATE: I saw a number of sites that forbid "Mozilla compatible" -server_version = "Python-urllib/%s" % urllib.__version__ -urllib._urlopener.addheaders[0] = ('User-agent', server_version) -urllib._urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0")) +if PY2: + urllib_version = urllib.__version__ +else: + urllib_version = urllib.request.__version__ +server_version = "Python-urllib/%s" % urllib_version -import os -dest_file = os.path.basename(url) +_urlopener.addheaders[0] = ('User-agent', server_version) +_urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0")) +dest_file = os.path.basename(url) if not dest_file: - dest_file = "_index.html" + dest_file = "_index.html" -filename, headers = urllib.urlretrieve(url, dest_file) +filename, headers = urlretrieve(url, filename=dest_file) -if headers.has_key("last-modified"): - from m_lib.net.www.util import parse_time - last_modified = parse_time(headers["last-modified"]) - if last_modified: - os.utime(dest_file, (last_modified, last_modified)) +if "last-modified" in headers: + from m_lib.net.www.util import parse_time + last_modified = parse_time(headers["last-modified"]) + if last_modified: + os.utime(dest_file, (last_modified, last_modified))