1 #! /usr/bin/env python3
5 from http.client import HTTPConnection
6 from urllib.parse import parse_qsl, urlencode, \
7 quote, quote_plus, unquote, unquote_plus, \
8 splittype, splithost, splituser, splitpasswd, \
9 splitport, splittag, splitquery
13 from cgi import parse_qsl
14 from urllib import urlencode, quote, quote_plus, unquote, unquote_plus, \
15 splittype, splithost, splituser, splitpasswd, \
16 splitport, splittag, splitquery
17 from httplib import HTTPConnection
21 from m_lib.defenc import default_encoding
25 protocol, request = splittype(url)
26 user, password, port = None, None, None
27 host, path = splithost(request)
29 user, host = splituser(host)
31 user, password = splitpasswd(user)
32 host, port = splitport(host)
33 if port: port = int(port)
34 path, tag = splittag(path)
35 path, query = splitquery(path)
37 if tag: tag = unquote_plus(tag)
41 for name, value in parse_qsl(query):
42 qlist.append((name, value))
44 url = protocol + "://"
48 url += ':' + quote(password)
52 host = host.decode(default_encoding)
53 host = host.encode('idna')
55 host = host.decode('ascii')
60 if protocol == "file":
65 url += '?' + urlencode(qlist)
67 url += '#' + quote_plus(tag)
69 server = HTTPConnection(host, port)
70 server.set_debuglevel(1)
72 server.putrequest("HEAD", path)
74 server.putheader("Host", '%s:%d' % (host, port))
76 server.putheader("Host", host)
78 # I remember seeing some sites that return broken HTML or even HTTP response
79 # without "compatible" user agent; I don't know if such sites are still around,
80 # but this header doesn't cause any harm so I'd better continue to use it.
81 # UPDATE: I saw a number of sites that forbid "Mozilla compatible"
83 urllib_version = urllib.__version__
85 urllib_version = urllib.request.__version__
86 client_version = "Python-urllib/%s" % urllib_version
87 server.putheader('User-agent', client_version)
89 server.putheader('Accept-Charset', "koi8-r;q=1.0")