1 """Simple, strightforward robot based on urllib
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_urllib']
16 from Robots.bkmk_robot_base import robot_base, request_headers, get_error
19 class RedirectException(Exception):
20 def __init__(self, errcode, newurl):
21 Exception.__init__(self)
22 self.errcode = errcode
26 class MyURLopener(urllib.URLopener):
27 # Error 301 -- relocated (permanently)
28 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
29 if 'location' in headers:
30 newurl = headers['location']
31 elif 'uri' in headers:
32 newurl = headers['uri']
35 raise RedirectException(errcode, newurl)
37 # Error 302 -- relocated (temporarily)
38 http_error_302 = http_error_301
39 # Error 303 -- relocated (see other)
40 http_error_303 = http_error_301
41 # Error 307 -- relocated (temporarily)
42 http_error_307 = http_error_301
43 # Error 308 -- relocated (permanently)
44 http_error_308 = http_error_301
46 # Error 401 -- authentication required
47 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
49 ('http error', errcode, "Authentication required ", headers))
51 def http_error_default(self, url, fp, errcode, errmsg, headers):
55 raise IOError(('http error', errcode, errmsg, headers))
58 def add_headers(opener):
60 _user_agent = request_headers.pop('User-Agent')
64 opener.addheaders[0] = ('User-Agent', _user_agent)
65 for h, v in request_headers.items():
66 opener.addheader(h, v)
69 urllib._urlopener = opener = MyURLopener()
72 urllib_ftpwrapper = urllib.ftpwrapper
76 class myftpwrapper(urllib_ftpwrapper):
77 def __init__(self, user, passwd, host, port, dirs):
78 urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
80 ftpcache_key = (user, host, port, '/'.join(dirs))
83 urllib.ftpwrapper = myftpwrapper
86 class robot_urllib(robot_base):
87 def get(self, bookmark, url, accept_charset=False):
89 # Set fake referer to the base URL
90 opener.addheaders[2] = ('Referer', url)
92 if accept_charset and bookmark.charset:
93 opener.addheader('Accept-Charset', bookmark.charset)
95 fname, headers = urllib.urlretrieve(url)
97 if accept_charset and bookmark.charset:
98 # Remove Accept-Charset
99 del opener.addheaders[-1]
101 infile = open(fname, 'rt')
102 content = infile.read()
105 return None, None, None, headers, content
107 except RedirectException as e:
108 return None, e.errcode, e.newurl, None, None
111 if (e[0] == "http error") and (e[1] == -1):
113 bookmark.no_error = "The server did not return any header - "
114 "it is not an error, actually"
115 self.log(' no headers: %s' % bookmark.no_error)
118 self.log(' Error: %s' % error)
120 return error, None, None, None, None
122 def get_ftp_welcome(self):
124 _welcome = opener.ftpcache[ftpcache_key].ftp.welcome
125 # I am assuming there are no duplicate ftp URLs in db.
126 # If there are - ftpcache_key in next line is invalid.
130 def finish_check_url(self, bookmark):
131 robot_base.finish_check_url(self, bookmark)