7 urllib._urlopener = urllib.FancyURLopener()
9 from cgi import parse_qsl
10 from m_lib.defenc import default_encoding
12 protocol, request = urllib.splittype(url)
13 user, password, port = None, None, None
14 host, path = urllib.splithost(request)
16 user, host = urllib.splituser(host)
18 user, password = urllib.splitpasswd(user)
19 host, port = urllib.splitport(host)
20 if port: port = int(port)
21 path, tag = urllib.splittag(path)
22 path, query = urllib.splitquery(path)
23 path = urllib.unquote(path)
24 if tag: tag = urllib.unquote_plus(tag)
28 for name, value in parse_qsl(query):
29 qlist.append((name, value))
31 url = protocol + "://"
33 url += urllib.quote(user)
35 url += ':' + urllib.quote(password)
38 url += host.decode(default_encoding).encode('idna')
42 if protocol == "file":
43 url += urllib.quote(path)
45 url += urllib.quote(path)
47 url += '?' + urllib.urlencode(qlist)
49 url += '#' + urllib.quote_plus(tag)
51 # I remember seeing some sites that return broken HTML or even HTTP response
52 # without "compatible" user agent; I don't know if such sites are still around,
53 # but this header doesn't cause any harm so I'd better continue to use it.
54 # UPDATE: I saw a number of sites that forbid "Mozilla compatible"
55 server_version = "Python-urllib/%s" % urllib.__version__
56 urllib._urlopener.addheaders[0] = ('User-agent', server_version)
57 urllib._urlopener.addheaders.append(('Accept-Charset', "koi8-r;q=1.0"))
60 dest_file = os.path.basename(url)
63 dest_file = "_index.html"
65 filename, headers = urllib.urlretrieve(url, dest_file)
67 if headers.has_key("last-modified"):
68 from m_lib.net.www.util import parse_time
69 last_modified = parse_time(headers["last-modified"])
71 os.utime(dest_file, (last_modified, last_modified))