1 """Simple, strightforward robot based on urllib
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_urllib_py3']
20 from Robots.bkmk_robot_base import robot_base, get_error
22 # Fake to import 'add_headers'
23 urllib.URLopener = urllib.request.URLopener
24 urllib.ftpwrapper = urllib.request.ftpwrapper
25 from Robots.bkmk_rurllib import add_headers # noqa: E402 import not at top
28 class RedirectException(Exception):
29 def __init__(self, errcode, newurl):
30 Exception.__init__(self)
31 self.errcode = errcode
35 class MyURLopener(urllib.request.URLopener):
36 # Error 301 -- relocated (permanently)
37 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
38 if 'location' in headers:
39 newurl = headers['location']
40 elif 'uri' in headers:
41 newurl = headers['uri']
44 raise RedirectException(errcode, newurl)
46 # Error 302 -- relocated (temporarily)
47 http_error_302 = http_error_301
48 # Error 303 -- relocated (see other)
49 http_error_303 = http_error_301
50 # Error 307 -- relocated (temporarily)
51 http_error_307 = http_error_301
52 # Error 308 -- relocated (permanently)
53 http_error_308 = http_error_301
55 # Error 401 -- authentication required
56 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
58 ('http error', errcode, "Authentication required ", headers))
60 def http_error_default(self, url, fp, errcode, errmsg, headers):
64 raise IOError(('http error', errcode, errmsg, headers))
66 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
67 return urllib.request.URLopener.open(self, fullurl, data)
70 urllib.request._opener = opener = MyURLopener()
73 urllib_ftpwrapper = urllib.request.ftpwrapper
77 class myftpwrapper(urllib_ftpwrapper):
78 def __init__(self, user, passwd, host, port, dirs):
79 urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
81 ftpcache_key = (user, host, port, '/'.join(dirs))
84 urllib.request.ftpwrapper = myftpwrapper
87 class robot_urllib_py3(robot_base):
88 def get(self, bookmark, url, accept_charset=False):
90 # Set fake referer to the base URL
91 opener.addheaders[2] = ('Referer', url)
93 if accept_charset and bookmark.charset:
94 opener.addheader('Accept-Charset', bookmark.charset)
96 fname, headers = urllib.request.urlretrieve(url)
98 if accept_charset and bookmark.charset:
99 # Remove Accept-Charset
100 del opener.addheaders[-1]
102 possible_encodings = []
105 sys.getfilesystemencoding(),
108 if encoding and encoding not in possible_encodings:
109 possible_encodings.append(encoding)
111 infile = open(fname, 'rb')
113 content = infile.read()
123 None, None, None, None
125 return None, None, None, headers, content
127 except RedirectException as e:
128 return None, e.errcode, e.newurl, None, None
130 except (OSError, http.client.IncompleteRead) as e:
132 self.log(' Error: %s' % error)
133 return error, None, None, None, None
136 if (e[0] == "http error") and (e[1] == -1):
138 bookmark.no_error = "The server did not return any header - "
139 "it is not an error, actually"
140 self.log(' no headers: %s' % bookmark.no_error)
143 self.log(' Error: %s' % error)
145 return error, None, None, None, None
147 def get_ftp_welcome(self):
149 _welcome = opener.ftpcache[ftpcache_key].ftp.welcome
150 # I am assuming there are no duplicate ftp URLs in db.
151 # If there are - ftpcache_key in next line is invalid.
155 def finish_check_url(self, bookmark):
156 robot_base.finish_check_url(self, bookmark)
157 urllib.request.urlcleanup()
158 urllib.request._opener = opener